From: Greg Kroah-Hartman Date: Tue, 8 Apr 2025 07:11:06 +0000 (+0200) Subject: 6.14-stable patches X-Git-Tag: v5.4.292~54 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=dc390fa1b39fc3adca56a8444a9e4c39aab82aca;p=thirdparty%2Fkernel%2Fstable-queue.git 6.14-stable patches added patches: acpi-x86-extend-lenovo-yoga-tab-3-quirk-with-skip-gpio-event-handlers.patch kvm-x86-block-kvm_cap_sync_regs-if-guest-state-is-protected.patch loongarch-bpf-don-t-override-subprog-s-return-value.patch loongarch-bpf-fix-off-by-one-error-in-build_prologue.patch loongarch-bpf-use-move_addr-for-bpf_pseudo_func.patch loongarch-increase-arch_dma_minalign-up-to-16.patch loongarch-increase-max_io_pics-up-to-8.patch ntb_perf-delete-duplicate-dmaengine_unmap_put-call-in-perf_copy_chunk.patch perf-x86-intel-apply-static-call-for-drain_pebs.patch perf-x86-intel-avoid-disable-pmu-if-cpuc-enabled-in-sample-read.patch platform-x86-isst-correct-command-storage-data-length.patch platform-x86-thinkpad_acpi-disable-acpi-fan-access-for-t495-and-e560.patch rust-fix-enabling-rust-and-building-with-gcc-for-loongarch.patch rust-pci-require-send-for-driver-trait-implementers.patch rust-platform-require-send-for-driver-trait-implementers.patch uprobes-x86-harden-uretprobe-syscall-trampoline-check.patch usbnet-fix-npe-during-rx_complete.patch x86-hyperv-fix-check-of-return-value-from-snp_set_vmsa.patch x86-mce-use-is_copy_from_user-to-determine-copy-from-user-context.patch x86-microcode-amd-fix-__apply_microcode_amd-s-return-value.patch x86-paravirt-move-halt-paravirt-calls-under-config_paravirt.patch x86-tdx-fix-arch_safe_halt-execution-for-tdx-vms.patch --- diff --git a/queue-6.14/acpi-x86-extend-lenovo-yoga-tab-3-quirk-with-skip-gpio-event-handlers.patch b/queue-6.14/acpi-x86-extend-lenovo-yoga-tab-3-quirk-with-skip-gpio-event-handlers.patch new file mode 100644 index 0000000000..eccf18e581 --- /dev/null +++ b/queue-6.14/acpi-x86-extend-lenovo-yoga-tab-3-quirk-with-skip-gpio-event-handlers.patch @@ -0,0 +1,50 @@ +From 2fa87c71d2adb4b82c105f9191e6120340feff00 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Tue, 25 Mar 2025 22:04:50 +0100 +Subject: ACPI: x86: Extend Lenovo Yoga Tab 3 quirk with skip GPIO event-handlers + +From: Hans de Goede + +commit 2fa87c71d2adb4b82c105f9191e6120340feff00 upstream. + +Depending on the secureboot signature on EFI\BOOT\BOOTX86.EFI the +Lenovo Yoga Tab 3 UEFI will switch its OSID ACPI variable between +1 (Windows) and 4 (Android(GMIN)). + +In Windows mode a GPIO event handler gets installed for GPO1 pin 5, +causing Linux' x86-android-tables code which deals with the general +brokenness of this device's ACPI tables to fail to probe with: + +[ 17.853705] x86_android_tablets: error -16 getting GPIO INT33FF:01 5 +[ 17.859623] x86_android_tablets x86_android_tablets: probe with driver + +which renders sound, the touchscreen, charging-management, +battery-monitoring and more non functional. + +Add ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS to the existing quirks for this +device to fix this. + +Reported-by: Agoston Lorincz +Closes: https://lore.kernel.org/platform-driver-x86/CAMEzqD+DNXrAvUOHviB2O2bjtcbmo3xH=kunKr4nubuMLbb_0A@mail.gmail.com/ +Cc: All applicable +Fixes: fe820db35275 ("ACPI: x86: Add skip i2c clients quirk for Lenovo Yoga Tab 3 Pro (YT3-X90F)") +Signed-off-by: Hans de Goede +Link: https://patch.msgid.link/20250325210450.358506-1-hdegoede@redhat.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/x86/utils.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/acpi/x86/utils.c ++++ b/drivers/acpi/x86/utils.c +@@ -374,7 +374,8 @@ static const struct dmi_system_id acpi_q + DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | +- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), ++ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ++ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), + }, + { + /* Medion Lifetab S10346 */ diff --git a/queue-6.14/kvm-x86-block-kvm_cap_sync_regs-if-guest-state-is-protected.patch b/queue-6.14/kvm-x86-block-kvm_cap_sync_regs-if-guest-state-is-protected.patch new file mode 100644 index 0000000000..f8f1b2dc91 --- /dev/null +++ b/queue-6.14/kvm-x86-block-kvm_cap_sync_regs-if-guest-state-is-protected.patch @@ -0,0 +1,81 @@ +From 74c1807f6c4feddb3c3cb1056c54531d4adbaea6 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 6 Mar 2025 21:29:22 +0100 +Subject: KVM: x86: block KVM_CAP_SYNC_REGS if guest state is protected + +From: Paolo Bonzini + +commit 74c1807f6c4feddb3c3cb1056c54531d4adbaea6 upstream. + +KVM_CAP_SYNC_REGS does not make sense for VMs with protected guest state, +since the register values cannot actually be written. Return 0 +when using the VM-level KVM_CHECK_EXTENSION ioctl, and accordingly +return -EINVAL from KVM_RUN if the valid/dirty fields are nonzero. + +However, on exit from KVM_RUN userspace could have placed a nonzero +value into kvm_run->kvm_valid_regs, so check guest_state_protected +again and skip store_regs() in that case. + +Cc: stable@vger.kernel.org +Fixes: 517987e3fb19 ("KVM: x86: add fields to struct kvm_arch for CoCo features") +Signed-off-by: Paolo Bonzini +Message-ID: <20250306202923.646075-1-pbonzini@redhat.com> +Reviewed-by: Pankaj Gupta +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4573,6 +4573,11 @@ static bool kvm_is_vm_type_supported(uns + return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); + } + ++static inline u32 kvm_sync_valid_fields(struct kvm *kvm) ++{ ++ return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS; ++} ++ + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + { + int r = 0; +@@ -4681,7 +4686,7 @@ int kvm_vm_ioctl_check_extension(struct + break; + #endif + case KVM_CAP_SYNC_REGS: +- r = KVM_SYNC_X86_VALID_FIELDS; ++ r = kvm_sync_valid_fields(kvm); + break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_VALID_FLAGS; +@@ -11474,6 +11479,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + { + struct kvm_queued_exception *ex = &vcpu->arch.exception; + struct kvm_run *kvm_run = vcpu->run; ++ u32 sync_valid_fields; + int r; + + r = kvm_mmu_post_init_vm(vcpu->kvm); +@@ -11519,8 +11525,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + goto out; + } + +- if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) || +- (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) { ++ sync_valid_fields = kvm_sync_valid_fields(vcpu->kvm); ++ if ((kvm_run->kvm_valid_regs & ~sync_valid_fields) || ++ (kvm_run->kvm_dirty_regs & ~sync_valid_fields)) { + r = -EINVAL; + goto out; + } +@@ -11578,7 +11585,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + + out: + kvm_put_guest_fpu(vcpu); +- if (kvm_run->kvm_valid_regs) ++ if (kvm_run->kvm_valid_regs && likely(!vcpu->arch.guest_state_protected)) + store_regs(vcpu); + post_kvm_run_save(vcpu); + kvm_vcpu_srcu_read_unlock(vcpu); diff --git a/queue-6.14/loongarch-bpf-don-t-override-subprog-s-return-value.patch b/queue-6.14/loongarch-bpf-don-t-override-subprog-s-return-value.patch new file mode 100644 index 0000000000..4ee2654cf5 --- /dev/null +++ b/queue-6.14/loongarch-bpf-don-t-override-subprog-s-return-value.patch @@ -0,0 +1,43 @@ +From 60f3caff1492e5b8616b9578c4bedb5c0a88ed14 Mon Sep 17 00:00:00 2001 +From: Hengqi Chen +Date: Sun, 30 Mar 2025 16:31:09 +0800 +Subject: LoongArch: BPF: Don't override subprog's return value + +From: Hengqi Chen + +commit 60f3caff1492e5b8616b9578c4bedb5c0a88ed14 upstream. + +The verifier test `calls: div by 0 in subprog` triggers a panic at the +ld.bu instruction. The ld.bu insn is trying to load byte from memory +address returned by the subprog. The subprog actually set the correct +address at the a5 register (dedicated register for BPF return values). +But at commit 73c359d1d356 ("LoongArch: BPF: Sign-extend return values") +we also sign extended a5 to the a0 register (return value in LoongArch). +For function call insn, we later propagate the a0 register back to a5 +register. This is right for native calls but wrong for bpf2bpf calls +which expect zero-extended return value in a5 register. So only move a0 +to a5 for native calls (i.e. non-BPF_PSEUDO_CALL). + +Cc: stable@vger.kernel.org +Fixes: 73c359d1d356 ("LoongArch: BPF: Sign-extend return values") +Signed-off-by: Hengqi Chen +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/net/bpf_jit.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/loongarch/net/bpf_jit.c ++++ b/arch/loongarch/net/bpf_jit.c +@@ -907,7 +907,10 @@ static int build_insn(const struct bpf_i + + move_addr(ctx, t1, func_addr); + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); +- move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); ++ ++ if (insn->src_reg != BPF_PSEUDO_CALL) ++ move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); ++ + break; + + /* tail call */ diff --git a/queue-6.14/loongarch-bpf-fix-off-by-one-error-in-build_prologue.patch b/queue-6.14/loongarch-bpf-fix-off-by-one-error-in-build_prologue.patch new file mode 100644 index 0000000000..ce8938fc61 --- /dev/null +++ b/queue-6.14/loongarch-bpf-fix-off-by-one-error-in-build_prologue.patch @@ -0,0 +1,60 @@ +From 7e2586991e36663c9bc48c828b83eab180ad30a9 Mon Sep 17 00:00:00 2001 +From: Hengqi Chen +Date: Sun, 30 Mar 2025 16:31:09 +0800 +Subject: LoongArch: BPF: Fix off-by-one error in build_prologue() + +From: Hengqi Chen + +commit 7e2586991e36663c9bc48c828b83eab180ad30a9 upstream. + +Vincent reported that running BPF progs with tailcalls on LoongArch +causes kernel hard lockup. Debugging the issues shows that the JITed +image missing a jirl instruction at the end of the epilogue. + +There are two passes in JIT compiling, the first pass set the flags and +the second pass generates JIT code based on those flags. With BPF progs +mixing bpf2bpf and tailcalls, build_prologue() generates N insns in the +first pass and then generates N+1 insns in the second pass. This makes +epilogue_offset off by one and we will jump to some unexpected insn and +cause lockup. Fix this by inserting a nop insn. + +Cc: stable@vger.kernel.org +Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") +Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls") +Reported-by: Vincent Li +Tested-by: Vincent Li +Closes: https://lore.kernel.org/loongarch/CAK3+h2w6WESdBN3UCr3WKHByD7D6Q_Ve1EDAjotVrnx6Or_c8g@mail.gmail.com/ +Closes: https://lore.kernel.org/bpf/CAK3+h2woEjG_N=-XzqEGaAeCmgu2eTCUc7p6bP4u8Q+DFHm-7g@mail.gmail.com/ +Signed-off-by: Hengqi Chen +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/net/bpf_jit.c | 2 ++ + arch/loongarch/net/bpf_jit.h | 5 +++++ + 2 files changed, 7 insertions(+) + +--- a/arch/loongarch/net/bpf_jit.c ++++ b/arch/loongarch/net/bpf_jit.c +@@ -142,6 +142,8 @@ static void build_prologue(struct jit_ct + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + move_reg(ctx, TCC_SAVED, REG_TCC); ++ else ++ emit_insn(ctx, nop); + + ctx->stack_size = stack_adjust; + } +--- a/arch/loongarch/net/bpf_jit.h ++++ b/arch/loongarch/net/bpf_jit.h +@@ -27,6 +27,11 @@ struct jit_data { + struct jit_ctx ctx; + }; + ++static inline void emit_nop(union loongarch_instruction *insn) ++{ ++ insn->word = INSN_NOP; ++} ++ + #define emit_insn(ctx, func, ...) \ + do { \ + if (ctx->image != NULL) { \ diff --git a/queue-6.14/loongarch-bpf-use-move_addr-for-bpf_pseudo_func.patch b/queue-6.14/loongarch-bpf-use-move_addr-for-bpf_pseudo_func.patch new file mode 100644 index 0000000000..a4eb25b488 --- /dev/null +++ b/queue-6.14/loongarch-bpf-use-move_addr-for-bpf_pseudo_func.patch @@ -0,0 +1,54 @@ +From 52266f1015a8b5aabec7d127f83d105f702b388e Mon Sep 17 00:00:00 2001 +From: Hengqi Chen +Date: Sun, 30 Mar 2025 16:31:09 +0800 +Subject: LoongArch: BPF: Use move_addr() for BPF_PSEUDO_FUNC + +From: Hengqi Chen + +commit 52266f1015a8b5aabec7d127f83d105f702b388e upstream. + +Vincent reported that running XDP synproxy program on LoongArch results +in the following error: + + JIT doesn't support bpf-to-bpf calls + +With dmesg: + + multi-func JIT bug 1391 != 1390 + +The root cause is that verifier will refill the imm with the correct +addresses of bpf_calls for BPF_PSEUDO_FUNC instructions and then run +the last pass of JIT. So we generate different JIT code for the same +instruction in two passes (one for placeholder and the other for the +real address). Let's use move_addr() instead. + +See commit 64f50f6575721ef0 ("LoongArch, bpf: Use 4 instructions for +function address in JIT") for a similar fix. + +Cc: stable@vger.kernel.org +Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") +Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls") +Reported-by: Vincent Li +Tested-by: Vincent Li +Closes: https://lore.kernel.org/loongarch/CAK3+h2yfM9FTNiXvEQBkvtuoJrvzmN4c_NZsFXqEk4Cj1tsBNA@mail.gmail.com/T/#u +Signed-off-by: Hengqi Chen +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/net/bpf_jit.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/loongarch/net/bpf_jit.c ++++ b/arch/loongarch/net/bpf_jit.c +@@ -935,7 +935,10 @@ static int build_insn(const struct bpf_i + { + const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; + +- move_imm(ctx, dst, imm64, is32); ++ if (bpf_pseudo_func(insn)) ++ move_addr(ctx, dst, imm64); ++ else ++ move_imm(ctx, dst, imm64, is32); + return 1; + } + diff --git a/queue-6.14/loongarch-increase-arch_dma_minalign-up-to-16.patch b/queue-6.14/loongarch-increase-arch_dma_minalign-up-to-16.patch new file mode 100644 index 0000000000..69150f5cf8 --- /dev/null +++ b/queue-6.14/loongarch-increase-arch_dma_minalign-up-to-16.patch @@ -0,0 +1,35 @@ +From 4103cfe9dcb88010ae4911d3ff417457d1b6a720 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Sun, 30 Mar 2025 16:31:09 +0800 +Subject: LoongArch: Increase ARCH_DMA_MINALIGN up to 16 + +From: Huacai Chen + +commit 4103cfe9dcb88010ae4911d3ff417457d1b6a720 upstream. + +ARCH_DMA_MINALIGN is 1 by default, but some LoongArch-specific devices +(such as APBDMA) require 16 bytes alignment. When the data buffer length +is too small, the hardware may make an error writing cacheline. Thus, it +is dangerous to allocate a small memory buffer for DMA. It's always safe +to define ARCH_DMA_MINALIGN as L1_CACHE_BYTES but unnecessary (kmalloc() +need small memory objects). Therefore, just increase it to 16. + +Cc: stable@vger.kernel.org +Tested-by: Binbin Zhou +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/include/asm/cache.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/loongarch/include/asm/cache.h ++++ b/arch/loongarch/include/asm/cache.h +@@ -8,6 +8,8 @@ + #define L1_CACHE_SHIFT CONFIG_L1_CACHE_SHIFT + #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + ++#define ARCH_DMA_MINALIGN (16) ++ + #define __read_mostly __section(".data..read_mostly") + + #endif /* _ASM_CACHE_H */ diff --git a/queue-6.14/loongarch-increase-max_io_pics-up-to-8.patch b/queue-6.14/loongarch-increase-max_io_pics-up-to-8.patch new file mode 100644 index 0000000000..61dada1475 --- /dev/null +++ b/queue-6.14/loongarch-increase-max_io_pics-up-to-8.patch @@ -0,0 +1,33 @@ +From ec105cadff5d8c0a029a3dc1084cae46cf3f799d Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Sun, 30 Mar 2025 16:31:09 +0800 +Subject: LoongArch: Increase MAX_IO_PICS up to 8 + +From: Huacai Chen + +commit ec105cadff5d8c0a029a3dc1084cae46cf3f799d upstream. + +Begin with Loongson-3C6000, the number of PCI host can be as many as +8 for multi-chip machines, and this number should be the same for I/O +interrupt controllers. To support these machines we also increase the +MAX_IO_PICS up to 8. + +Cc: stable@vger.kernel.org +Tested-by: Mingcong Bai +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/include/asm/irq.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/loongarch/include/asm/irq.h ++++ b/arch/loongarch/include/asm/irq.h +@@ -53,7 +53,7 @@ void spurious_interrupt(void); + #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace + void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu); + +-#define MAX_IO_PICS 2 ++#define MAX_IO_PICS 8 + #define NR_IRQS (64 + NR_VECTORS * (NR_CPUS + MAX_IO_PICS)) + + struct acpi_vector_group { diff --git a/queue-6.14/ntb_perf-delete-duplicate-dmaengine_unmap_put-call-in-perf_copy_chunk.patch b/queue-6.14/ntb_perf-delete-duplicate-dmaengine_unmap_put-call-in-perf_copy_chunk.patch new file mode 100644 index 0000000000..f664c695e9 --- /dev/null +++ b/queue-6.14/ntb_perf-delete-duplicate-dmaengine_unmap_put-call-in-perf_copy_chunk.patch @@ -0,0 +1,41 @@ +From 4279e72cab31dd3eb8c89591eb9d2affa90ab6aa Mon Sep 17 00:00:00 2001 +From: Markus Elfring +Date: Mon, 23 Sep 2024 10:38:11 +0200 +Subject: ntb_perf: Delete duplicate dmaengine_unmap_put() call in perf_copy_chunk() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Markus Elfring + +commit 4279e72cab31dd3eb8c89591eb9d2affa90ab6aa upstream. + +The function call “dmaengine_unmap_put(unmap)” was used in an if branch. +The same call was immediately triggered by a subsequent goto statement. +Thus avoid such a call repetition. + +This issue was detected by using the Coccinelle software. + +Fixes: 5648e56d03fa ("NTB: ntb_perf: Add full multi-port NTB API support") +Cc: stable@vger.kernel.org +Signed-off-by: Markus Elfring +Signed-off-by: Jon Mason +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ntb/test/ntb_perf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/ntb/test/ntb_perf.c ++++ b/drivers/ntb/test/ntb_perf.c +@@ -839,10 +839,8 @@ static int perf_copy_chunk(struct perf_t + dma_set_unmap(tx, unmap); + + ret = dma_submit_error(dmaengine_submit(tx)); +- if (ret) { +- dmaengine_unmap_put(unmap); ++ if (ret) + goto err_free_resource; +- } + + dmaengine_unmap_put(unmap); + diff --git a/queue-6.14/perf-x86-intel-apply-static-call-for-drain_pebs.patch b/queue-6.14/perf-x86-intel-apply-static-call-for-drain_pebs.patch new file mode 100644 index 0000000000..9ee4ce87e9 --- /dev/null +++ b/queue-6.14/perf-x86-intel-apply-static-call-for-drain_pebs.patch @@ -0,0 +1,60 @@ +From 314dfe10576912e1d786b13c5d4eee8c51b63caa Mon Sep 17 00:00:00 2001 +From: "Peter Zijlstra (Intel)" +Date: Tue, 21 Jan 2025 07:23:00 -0800 +Subject: perf/x86/intel: Apply static call for drain_pebs + +From: Peter Zijlstra (Intel) + +commit 314dfe10576912e1d786b13c5d4eee8c51b63caa upstream. + +The x86_pmu_drain_pebs static call was introduced in commit 7c9903c9bf71 +("x86/perf, static_call: Optimize x86_pmu methods"), but it's not really +used to replace the old method. + +Apply the static call for drain_pebs. + +Fixes: 7c9903c9bf71 ("x86/perf, static_call: Optimize x86_pmu methods") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20250121152303.3128733-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 2 +- + arch/x86/events/intel/ds.c | 2 +- + arch/x86/events/perf_event.h | 1 + + 3 files changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -3070,7 +3070,7 @@ static int handle_pmi_common(struct pt_r + + handled++; + x86_pmu_handle_guest_pebs(regs, &data); +- x86_pmu.drain_pebs(regs, &data); ++ static_call(x86_pmu_drain_pebs)(regs, &data); + status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + + /* +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -957,7 +957,7 @@ static inline void intel_pmu_drain_pebs_ + { + struct perf_sample_data data; + +- x86_pmu.drain_pebs(NULL, &data); ++ static_call(x86_pmu_drain_pebs)(NULL, &data); + } + + /* +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -1108,6 +1108,7 @@ extern struct x86_pmu x86_pmu __read_mos + + DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); + DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); ++DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); + + static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) + { diff --git a/queue-6.14/perf-x86-intel-avoid-disable-pmu-if-cpuc-enabled-in-sample-read.patch b/queue-6.14/perf-x86-intel-avoid-disable-pmu-if-cpuc-enabled-in-sample-read.patch new file mode 100644 index 0000000000..0eb86b0f4d --- /dev/null +++ b/queue-6.14/perf-x86-intel-avoid-disable-pmu-if-cpuc-enabled-in-sample-read.patch @@ -0,0 +1,128 @@ +From f9bdf1f953392c9edd69a7f884f78c0390127029 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Tue, 21 Jan 2025 07:23:01 -0800 +Subject: perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read + +From: Kan Liang + +commit f9bdf1f953392c9edd69a7f884f78c0390127029 upstream. + +The WARN_ON(this_cpu_read(cpu_hw_events.enabled)) in the +intel_pmu_save_and_restart_reload() is triggered, when sampling read +topdown events. + +In a NMI handler, the cpu_hw_events.enabled is set and used to indicate +the status of core PMU. The generic pmu->pmu_disable_count, updated in +the perf_pmu_disable/enable pair, is not touched. +However, the perf_pmu_disable/enable pair is invoked when sampling read +in a NMI handler. The cpuc->enabled is mistakenly set by the +perf_pmu_enable(). + +Avoid disabling PMU if the core PMU is already disabled. +Merge the logic together. + +Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics") +Suggested-by: Peter Zijlstra (Intel) +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20250121152303.3128733-2-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 41 +++++++++++++++++++++++------------------ + arch/x86/events/intel/ds.c | 11 +---------- + arch/x86/events/perf_event.h | 2 +- + 3 files changed, 25 insertions(+), 29 deletions(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -2779,28 +2779,33 @@ static u64 icl_update_topdown_event(stru + + DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); + +-static void intel_pmu_read_topdown_event(struct perf_event *event) ++static void intel_pmu_read_event(struct perf_event *event) + { +- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN)) { ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ bool pmu_enabled = cpuc->enabled; ++ ++ /* Only need to call update_topdown_event() once for group read. */ ++ if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ)) ++ return; ++ ++ cpuc->enabled = 0; ++ if (pmu_enabled) ++ intel_pmu_disable_all(); ++ ++ if (is_topdown_event(event)) ++ static_call(intel_pmu_update_topdown_event)(event); ++ else ++ intel_pmu_drain_pebs_buffer(); ++ ++ cpuc->enabled = pmu_enabled; ++ if (pmu_enabled) ++ intel_pmu_enable_all(0); + +- /* Only need to call update_topdown_event() once for group read. */ +- if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && +- !is_slots_event(event)) + return; ++ } + +- perf_pmu_disable(event->pmu); +- static_call(intel_pmu_update_topdown_event)(event); +- perf_pmu_enable(event->pmu); +-} +- +-static void intel_pmu_read_event(struct perf_event *event) +-{ +- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) +- intel_pmu_auto_reload_read(event); +- else if (is_topdown_count(event)) +- intel_pmu_read_topdown_event(event); +- else +- x86_perf_event_update(event); ++ x86_perf_event_update(event); + } + + static void intel_pmu_enable_fixed(struct perf_event *event) +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -953,7 +953,7 @@ unlock: + return 1; + } + +-static inline void intel_pmu_drain_pebs_buffer(void) ++void intel_pmu_drain_pebs_buffer(void) + { + struct perf_sample_data data; + +@@ -2094,15 +2094,6 @@ get_next_pebs_record_by_bit(void *base, + return NULL; + } + +-void intel_pmu_auto_reload_read(struct perf_event *event) +-{ +- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); +- +- perf_pmu_disable(event->pmu); +- intel_pmu_drain_pebs_buffer(); +- perf_pmu_enable(event->pmu); +-} +- + /* + * Special variant of intel_pmu_save_and_restart() for auto-reload. + */ +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -1648,7 +1648,7 @@ void intel_pmu_pebs_disable_all(void); + + void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); + +-void intel_pmu_auto_reload_read(struct perf_event *event); ++void intel_pmu_drain_pebs_buffer(void); + + void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); + diff --git a/queue-6.14/platform-x86-isst-correct-command-storage-data-length.patch b/queue-6.14/platform-x86-isst-correct-command-storage-data-length.patch new file mode 100644 index 0000000000..8cd041f510 --- /dev/null +++ b/queue-6.14/platform-x86-isst-correct-command-storage-data-length.patch @@ -0,0 +1,45 @@ +From 9462e74c5c983cce34019bfb27f734552bebe59f Mon Sep 17 00:00:00 2001 +From: Srinivas Pandruvada +Date: Fri, 28 Mar 2025 15:47:49 -0700 +Subject: platform/x86: ISST: Correct command storage data length +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Srinivas Pandruvada + +commit 9462e74c5c983cce34019bfb27f734552bebe59f upstream. + +After resume/online turbo limit ratio (TRL) is restored partially if +the admin explicitly changed TRL from user space. + +A hash table is used to store SST mail box and MSR settings when modified +to restore those settings after resume or online. This uses a struct +isst_cmd field "data" to store these settings. This is a 64 bit field. +But isst_store_new_cmd() is only assigning as u32. This results in +truncation of 32 bits. + +Change the argument to u64 from u32. + +Fixes: f607874f35cb ("platform/x86: ISST: Restore state on resume") +Signed-off-by: Srinivas Pandruvada +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250328224749.2691272-1-srinivas.pandruvada@linux.intel.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c ++++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +@@ -84,7 +84,7 @@ static DECLARE_HASHTABLE(isst_hash, 8); + static DEFINE_MUTEX(isst_hash_lock); + + static int isst_store_new_cmd(int cmd, u32 cpu, int mbox_cmd_type, u32 param, +- u32 data) ++ u64 data) + { + struct isst_cmd *sst_cmd; + diff --git a/queue-6.14/platform-x86-thinkpad_acpi-disable-acpi-fan-access-for-t495-and-e560.patch b/queue-6.14/platform-x86-thinkpad_acpi-disable-acpi-fan-access-for-t495-and-e560.patch new file mode 100644 index 0000000000..e69e07bdd3 --- /dev/null +++ b/queue-6.14/platform-x86-thinkpad_acpi-disable-acpi-fan-access-for-t495-and-e560.patch @@ -0,0 +1,76 @@ +From 2b9f84e7dc863afd63357b867cea246aeedda036 Mon Sep 17 00:00:00 2001 +From: Eduard Christian Dumitrescu +Date: Mon, 24 Mar 2025 11:24:42 -0400 +Subject: platform/x86: thinkpad_acpi: disable ACPI fan access for T495* and E560 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Eduard Christian Dumitrescu + +commit 2b9f84e7dc863afd63357b867cea246aeedda036 upstream. + +T495, T495s, and E560 laptops have the FANG+FANW ACPI methods +(therefore fang_handle and fanw_handle are not NULL) but they do not +actually work, which results in a "No such device or address" error. +The DSDT table code for the FANG+FANW methods doesn't seem to do +anything special regarding the fan being secondary. The bug was +introduced in commit 57d0557dfa49 ("platform/x86: thinkpad_acpi: Add +Thinkpad Edge E531 fan support"), which added a new fan control method +via the FANG+FANW ACPI methods. + +Add a quirk for T495, T495s, and E560 to avoid the FANG+FANW methods. +Fan access and control is restored after forcing the legacy non-ACPI +fan control method by setting both fang_handle and fanw_handle to NULL. + +Reported-by: Vlastimil Holer +Fixes: 57d0557dfa49 ("platform/x86: thinkpad_acpi: Add Thinkpad Edge E531 fan support") +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219643 +Cc: stable@vger.kernel.org +Tested-by: Alireza Elikahi +Reviewed-by: Kurt Borja +Signed-off-by: Eduard Christian Dumitrescu +Co-developed-by: Seyediman Seyedarab +Signed-off-by: Seyediman Seyedarab +Link: https://lore.kernel.org/r/20250324152442.106113-1-ImanDevel@gmail.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/thinkpad_acpi.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/drivers/platform/x86/thinkpad_acpi.c ++++ b/drivers/platform/x86/thinkpad_acpi.c +@@ -8797,6 +8797,7 @@ static const struct attribute_group fan_ + #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ + #define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ + #define TPACPI_FAN_TPR 0x0040 /* Fan speed is in Ticks Per Revolution */ ++#define TPACPI_FAN_NOACPI 0x0080 /* Don't use ACPI methods even if detected */ + + static const struct tpacpi_quirk fan_quirk_table[] __initconst = { + TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), +@@ -8827,6 +8828,9 @@ static const struct tpacpi_quirk fan_qui + TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ + TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ + TPACPI_Q_LNV('8', 'F', TPACPI_FAN_TPR), /* ThinkPad x120e */ ++ TPACPI_Q_LNV3('R', '0', '0', TPACPI_FAN_NOACPI),/* E560 */ ++ TPACPI_Q_LNV3('R', '1', '2', TPACPI_FAN_NOACPI),/* T495 */ ++ TPACPI_Q_LNV3('R', '1', '3', TPACPI_FAN_NOACPI),/* T495s */ + }; + + static int __init fan_init(struct ibm_init_struct *iibm) +@@ -8878,6 +8882,13 @@ static int __init fan_init(struct ibm_in + tp_features.fan_ctrl_status_undef = 1; + } + ++ if (quirks & TPACPI_FAN_NOACPI) { ++ /* E560, T495, T495s */ ++ pr_info("Ignoring buggy ACPI fan access method\n"); ++ fang_handle = NULL; ++ fanw_handle = NULL; ++ } ++ + if (gfan_handle) { + /* 570, 600e/x, 770e, 770x */ + fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; diff --git a/queue-6.14/rust-fix-enabling-rust-and-building-with-gcc-for-loongarch.patch b/queue-6.14/rust-fix-enabling-rust-and-building-with-gcc-for-loongarch.patch new file mode 100644 index 0000000000..85716ea998 --- /dev/null +++ b/queue-6.14/rust-fix-enabling-rust-and-building-with-gcc-for-loongarch.patch @@ -0,0 +1,42 @@ +From 13c23cb4ed09466d73f1beae8956810b95add6ef Mon Sep 17 00:00:00 2001 +From: WANG Rui +Date: Sun, 30 Mar 2025 16:30:20 +0800 +Subject: rust: Fix enabling Rust and building with GCC for LoongArch + +From: WANG Rui + +commit 13c23cb4ed09466d73f1beae8956810b95add6ef upstream. + +This patch fixes a build issue on LoongArch when Rust is enabled and +compiled with GCC by explicitly setting the bindgen target and skipping +C flags that Clang doesn't support. + +Cc: stable@vger.kernel.org +Acked-by: Miguel Ojeda +Signed-off-by: WANG Rui +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + rust/Makefile | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/rust/Makefile ++++ b/rust/Makefile +@@ -232,7 +232,8 @@ bindgen_skip_c_flags := -mno-fp-ret-in-3 + -mfunction-return=thunk-extern -mrecord-mcount -mabi=lp64 \ + -mindirect-branch-cs-prefix -mstack-protector-guard% -mtraceback=no \ + -mno-pointers-to-nested-functions -mno-string \ +- -mno-strict-align -mstrict-align \ ++ -mno-strict-align -mstrict-align -mdirect-extern-access \ ++ -mexplicit-relocs -mno-check-zero-division \ + -fconserve-stack -falign-jumps=% -falign-loops=% \ + -femit-struct-debug-baseonly -fno-ipa-cp-clone -fno-ipa-sra \ + -fno-partial-inlining -fplugin-arg-arm_ssp_per_task_plugin-% \ +@@ -246,6 +247,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-3 + # Derived from `scripts/Makefile.clang`. + BINDGEN_TARGET_x86 := x86_64-linux-gnu + BINDGEN_TARGET_arm64 := aarch64-linux-gnu ++BINDGEN_TARGET_loongarch := loongarch64-linux-gnusf + BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) + + # All warnings are inhibited since GCC builds are very experimental, diff --git a/queue-6.14/rust-pci-require-send-for-driver-trait-implementers.patch b/queue-6.14/rust-pci-require-send-for-driver-trait-implementers.patch new file mode 100644 index 0000000000..17d830c41c --- /dev/null +++ b/queue-6.14/rust-pci-require-send-for-driver-trait-implementers.patch @@ -0,0 +1,44 @@ +From 935e1d90bf6f14cd190b3a95f3cbf7e298123043 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 19 Mar 2025 15:52:55 +0100 +Subject: rust: pci: require Send for Driver trait implementers + +From: Danilo Krummrich + +commit 935e1d90bf6f14cd190b3a95f3cbf7e298123043 upstream. + +The instance of Self, returned and created by Driver::probe() is +dropped in the bus' remove() callback. + +Request implementers of the Driver trait to implement Send, since the +remove() callback is not guaranteed to run from the same thread as +probe(). + +Fixes: 1bd8b6b2c5d3 ("rust: pci: add basic PCI device / driver abstractions") +Cc: stable +Reported-by: Alice Ryhl +Closes: https://lore.kernel.org/lkml/Z9rDxOJ2V2bPjj5i@google.com/ +Signed-off-by: Danilo Krummrich +Reviewed-by: Benno Lossin +Link: https://lore.kernel.org/r/20250319145350.69543-1-dakr@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + rust/kernel/pci.rs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs +index 0d09ae34a64d..22a32172b108 100644 +--- a/rust/kernel/pci.rs ++++ b/rust/kernel/pci.rs +@@ -222,7 +222,7 @@ macro_rules! pci_device_table { + ///``` + /// Drivers must implement this trait in order to get a PCI driver registered. Please refer to the + /// `Adapter` documentation for an example. +-pub trait Driver { ++pub trait Driver: Send { + /// The type holding information about each device id supported by the driver. + /// + /// TODO: Use associated_type_defaults once stabilized: +-- +2.49.0 + diff --git a/queue-6.14/rust-platform-require-send-for-driver-trait-implementers.patch b/queue-6.14/rust-platform-require-send-for-driver-trait-implementers.patch new file mode 100644 index 0000000000..3b1b9fe6bd --- /dev/null +++ b/queue-6.14/rust-platform-require-send-for-driver-trait-implementers.patch @@ -0,0 +1,39 @@ +From 51d0de7596a458096756c895cfed6bc4a7ecac10 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 19 Mar 2025 15:52:56 +0100 +Subject: rust: platform: require Send for Driver trait implementers + +From: Danilo Krummrich + +commit 51d0de7596a458096756c895cfed6bc4a7ecac10 upstream. + +The instance of Self, returned and created by Driver::probe() is +dropped in the bus' remove() callback. + +Request implementers of the Driver trait to implement Send, since the +remove() callback is not guaranteed to run from the same thread as +probe(). + +Fixes: 683a63befc73 ("rust: platform: add basic platform device / driver abstractions") +Cc: stable +Reported-by: Alice Ryhl +Closes: https://lore.kernel.org/lkml/Z9rDxOJ2V2bPjj5i@google.com/ +Signed-off-by: Danilo Krummrich +Reviewed-by: Benno Lossin +Link: https://lore.kernel.org/r/20250319145350.69543-2-dakr@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + rust/kernel/platform.rs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/rust/kernel/platform.rs ++++ b/rust/kernel/platform.rs +@@ -149,7 +149,7 @@ macro_rules! module_platform_driver { + /// } + /// } + ///``` +-pub trait Driver { ++pub trait Driver: Send { + /// The type holding driver private data about each device id supported by the driver. + /// + /// TODO: Use associated_type_defaults once stabilized: diff --git a/queue-6.14/series b/queue-6.14/series index e05297f3b4..80915c12db 100644 --- a/queue-6.14/series +++ b/queue-6.14/series @@ -656,3 +656,25 @@ staging-gpib-fix-oops-after-disconnect-in-agilent-us.patch tty-serial-fsl_lpuart-use-u32-and-u8-for-register-va.patch tty-serial-fsl_lpuart-use-port-struct-directly-to-si.patch tty-serial-lpuart-only-disable-cts-instead-of-overwr.patch +usbnet-fix-npe-during-rx_complete.patch +rust-pci-require-send-for-driver-trait-implementers.patch +rust-platform-require-send-for-driver-trait-implementers.patch +rust-fix-enabling-rust-and-building-with-gcc-for-loongarch.patch +loongarch-increase-arch_dma_minalign-up-to-16.patch +loongarch-increase-max_io_pics-up-to-8.patch +loongarch-bpf-fix-off-by-one-error-in-build_prologue.patch +loongarch-bpf-don-t-override-subprog-s-return-value.patch +loongarch-bpf-use-move_addr-for-bpf_pseudo_func.patch +x86-hyperv-fix-check-of-return-value-from-snp_set_vmsa.patch +kvm-x86-block-kvm_cap_sync_regs-if-guest-state-is-protected.patch +x86-microcode-amd-fix-__apply_microcode_amd-s-return-value.patch +x86-mce-use-is_copy_from_user-to-determine-copy-from-user-context.patch +x86-paravirt-move-halt-paravirt-calls-under-config_paravirt.patch +x86-tdx-fix-arch_safe_halt-execution-for-tdx-vms.patch +acpi-x86-extend-lenovo-yoga-tab-3-quirk-with-skip-gpio-event-handlers.patch +platform-x86-thinkpad_acpi-disable-acpi-fan-access-for-t495-and-e560.patch +platform-x86-isst-correct-command-storage-data-length.patch +ntb_perf-delete-duplicate-dmaengine_unmap_put-call-in-perf_copy_chunk.patch +perf-x86-intel-apply-static-call-for-drain_pebs.patch +perf-x86-intel-avoid-disable-pmu-if-cpuc-enabled-in-sample-read.patch +uprobes-x86-harden-uretprobe-syscall-trampoline-check.patch diff --git a/queue-6.14/uprobes-x86-harden-uretprobe-syscall-trampoline-check.patch b/queue-6.14/uprobes-x86-harden-uretprobe-syscall-trampoline-check.patch new file mode 100644 index 0000000000..02ca88fb12 --- /dev/null +++ b/queue-6.14/uprobes-x86-harden-uretprobe-syscall-trampoline-check.patch @@ -0,0 +1,92 @@ +From fa6192adc32f4fdfe5b74edd5b210e12afd6ecc0 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Wed, 12 Feb 2025 23:04:33 +0100 +Subject: uprobes/x86: Harden uretprobe syscall trampoline check + +From: Jiri Olsa + +commit fa6192adc32f4fdfe5b74edd5b210e12afd6ecc0 upstream. + +Jann reported a possible issue when trampoline_check_ip returns +address near the bottom of the address space that is allowed to +call into the syscall if uretprobes are not set up: + + https://lore.kernel.org/bpf/202502081235.5A6F352985@keescook/T/#m9d416df341b8fbc11737dacbcd29f0054413cbbf + +Though the mmap minimum address restrictions will typically prevent +creating mappings there, let's make sure uretprobe syscall checks +for that. + +Fixes: ff474a78cef5 ("uprobe: Add uretprobe syscall to speed up return probe") +Reported-by: Jann Horn +Signed-off-by: Jiri Olsa +Signed-off-by: Ingo Molnar +Reviewed-by: Oleg Nesterov +Reviewed-by: Kees Cook +Acked-by: Andrii Nakryiko +Acked-by: Masami Hiramatsu (Google) +Acked-by: Alexei Starovoitov +Cc: Andy Lutomirski +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250212220433.3624297-1-jolsa@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/uprobes.c | 14 +++++++++----- + include/linux/uprobes.h | 2 ++ + kernel/events/uprobes.c | 2 +- + 3 files changed, 12 insertions(+), 6 deletions(-) + +--- a/arch/x86/kernel/uprobes.c ++++ b/arch/x86/kernel/uprobes.c +@@ -357,19 +357,23 @@ void *arch_uprobe_trampoline(unsigned lo + return &insn; + } + +-static unsigned long trampoline_check_ip(void) ++static unsigned long trampoline_check_ip(unsigned long tramp) + { +- unsigned long tramp = uprobe_get_trampoline_vaddr(); +- + return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry); + } + + SYSCALL_DEFINE0(uretprobe) + { + struct pt_regs *regs = task_pt_regs(current); +- unsigned long err, ip, sp, r11_cx_ax[3]; ++ unsigned long err, ip, sp, r11_cx_ax[3], tramp; ++ ++ /* If there's no trampoline, we are called from wrong place. */ ++ tramp = uprobe_get_trampoline_vaddr(); ++ if (unlikely(tramp == UPROBE_NO_TRAMPOLINE_VADDR)) ++ goto sigill; + +- if (regs->ip != trampoline_check_ip()) ++ /* Make sure the ip matches the only allowed sys_uretprobe caller. */ ++ if (unlikely(regs->ip != trampoline_check_ip(tramp))) + goto sigill; + + err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax)); +--- a/include/linux/uprobes.h ++++ b/include/linux/uprobes.h +@@ -39,6 +39,8 @@ struct page; + + #define MAX_URETPROBE_DEPTH 64 + ++#define UPROBE_NO_TRAMPOLINE_VADDR (~0UL) ++ + struct uprobe_consumer { + /* + * handler() can return UPROBE_HANDLER_REMOVE to signal the need to +--- a/kernel/events/uprobes.c ++++ b/kernel/events/uprobes.c +@@ -2180,8 +2180,8 @@ void uprobe_copy_process(struct task_str + */ + unsigned long uprobe_get_trampoline_vaddr(void) + { ++ unsigned long trampoline_vaddr = UPROBE_NO_TRAMPOLINE_VADDR; + struct xol_area *area; +- unsigned long trampoline_vaddr = -1; + + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ diff --git a/queue-6.14/usbnet-fix-npe-during-rx_complete.patch b/queue-6.14/usbnet-fix-npe-during-rx_complete.patch new file mode 100644 index 0000000000..b9cfdeac0c --- /dev/null +++ b/queue-6.14/usbnet-fix-npe-during-rx_complete.patch @@ -0,0 +1,57 @@ +From 51de3600093429e3b712e5f091d767babc5dd6df Mon Sep 17 00:00:00 2001 +From: Ying Lu +Date: Wed, 2 Apr 2025 16:58:59 +0800 +Subject: usbnet:fix NPE during rx_complete +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ying Lu + +commit 51de3600093429e3b712e5f091d767babc5dd6df upstream. + +Missing usbnet_going_away Check in Critical Path. +The usb_submit_urb function lacks a usbnet_going_away +validation, whereas __usbnet_queue_skb includes this check. + +This inconsistency creates a race condition where: +A URB request may succeed, but the corresponding SKB data +fails to be queued. + +Subsequent processes: +(e.g., rx_complete → defer_bh → __skb_unlink(skb, list)) +attempt to access skb->next, triggering a NULL pointer +dereference (Kernel Panic). + +Fixes: 04e906839a05 ("usbnet: fix cyclical race on disconnect with work queue") +Cc: stable@vger.kernel.org +Signed-off-by: Ying Lu +Link: https://patch.msgid.link/4c9ef2efaa07eb7f9a5042b74348a67e5a3a7aea.1743584159.git.luying1@xiaomi.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/usbnet.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -530,7 +530,8 @@ static int rx_submit (struct usbnet *dev + netif_device_present (dev->net) && + test_bit(EVENT_DEV_OPEN, &dev->flags) && + !test_bit (EVENT_RX_HALT, &dev->flags) && +- !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) { ++ !test_bit (EVENT_DEV_ASLEEP, &dev->flags) && ++ !usbnet_going_away(dev)) { + switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) { + case -EPIPE: + usbnet_defer_kevent (dev, EVENT_RX_HALT); +@@ -551,8 +552,7 @@ static int rx_submit (struct usbnet *dev + tasklet_schedule (&dev->bh); + break; + case 0: +- if (!usbnet_going_away(dev)) +- __usbnet_queue_skb(&dev->rxq, skb, rx_start); ++ __usbnet_queue_skb(&dev->rxq, skb, rx_start); + } + } else { + netif_dbg(dev, ifdown, dev->net, "rx: stopped\n"); diff --git a/queue-6.14/x86-hyperv-fix-check-of-return-value-from-snp_set_vmsa.patch b/queue-6.14/x86-hyperv-fix-check-of-return-value-from-snp_set_vmsa.patch new file mode 100644 index 0000000000..31e2519644 --- /dev/null +++ b/queue-6.14/x86-hyperv-fix-check-of-return-value-from-snp_set_vmsa.patch @@ -0,0 +1,34 @@ +From e792d843aa3c9d039074cdce728d5803262e57a7 Mon Sep 17 00:00:00 2001 +From: Tianyu Lan +Date: Thu, 13 Mar 2025 04:52:17 -0400 +Subject: x86/hyperv: Fix check of return value from snp_set_vmsa() + +From: Tianyu Lan + +commit e792d843aa3c9d039074cdce728d5803262e57a7 upstream. + +snp_set_vmsa() returns 0 as success result and so fix it. + +Cc: stable@vger.kernel.org +Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") +Signed-off-by: Tianyu Lan +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20250313085217.45483-1-ltykernel@gmail.com +Signed-off-by: Wei Liu +Message-ID: <20250313085217.45483-1-ltykernel@gmail.com> +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/hyperv/ivm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/hyperv/ivm.c ++++ b/arch/x86/hyperv/ivm.c +@@ -338,7 +338,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned lon + vmsa->sev_features = sev_status >> 2; + + ret = snp_set_vmsa(vmsa, true); +- if (!ret) { ++ if (ret) { + pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); + free_page((u64)vmsa); + return ret; diff --git a/queue-6.14/x86-mce-use-is_copy_from_user-to-determine-copy-from-user-context.patch b/queue-6.14/x86-mce-use-is_copy_from_user-to-determine-copy-from-user-context.patch new file mode 100644 index 0000000000..c8f2a47377 --- /dev/null +++ b/queue-6.14/x86-mce-use-is_copy_from_user-to-determine-copy-from-user-context.patch @@ -0,0 +1,213 @@ +From 1a15bb8303b6b104e78028b6c68f76a0d4562134 Mon Sep 17 00:00:00 2001 +From: Shuai Xue +Date: Wed, 12 Mar 2025 19:28:50 +0800 +Subject: x86/mce: use is_copy_from_user() to determine copy-from-user context + +From: Shuai Xue + +commit 1a15bb8303b6b104e78028b6c68f76a0d4562134 upstream. + +Patch series "mm/hwpoison: Fix regressions in memory failure handling", +v4. + +## 1. What am I trying to do: + +This patchset resolves two critical regressions related to memory failure +handling that have appeared in the upstream kernel since version 5.17, as +compared to 5.10 LTS. + + - copyin case: poison found in user page while kernel copying from user space + - instr case: poison found while instruction fetching in user space + +## 2. What is the expected outcome and why + +- For copyin case: + +Kernel can recover from poison found where kernel is doing get_user() or +copy_from_user() if those places get an error return and the kernel return +-EFAULT to the process instead of crashing. More specifily, MCE handler +checks the fixup handler type to decide whether an in kernel #MC can be +recovered. When EX_TYPE_UACCESS is found, the PC jumps to recovery code +specified in _ASM_EXTABLE_FAULT() and return a -EFAULT to user space. + +- For instr case: + +If a poison found while instruction fetching in user space, full recovery +is possible. User process takes #PF, Linux allocates a new page and fills +by reading from storage. + + +## 3. What actually happens and why + +- For copyin case: kernel panic since v5.17 + +Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new +extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the +extable fixup type for copy-from-user operations, changing it from +EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. It breaks previous EX_TYPE_UACCESS +handling when posion found in get_user() or copy_from_user(). + +- For instr case: user process is killed by a SIGBUS signal due to #CMCI + and #MCE race + +When an uncorrected memory error is consumed there is a race between the +CMCI from the memory controller reporting an uncorrected error with a UCNA +signature, and the core reporting and SRAR signature machine check when +the data is about to be consumed. + +### Background: why *UN*corrected errors tied to *C*MCI in Intel platform [1] + +Prior to Icelake memory controllers reported patrol scrub events that +detected a previously unseen uncorrected error in memory by signaling a +broadcast machine check with an SRAO (Software Recoverable Action +Optional) signature in the machine check bank. This was overkill because +it's not an urgent problem that no core is on the verge of consuming that +bad data. It's also found that multi SRAO UCE may cause nested MCE +interrupts and finally become an IERR. + +Hence, Intel downgrades the machine check bank signature of patrol scrub +from SRAO to UCNA (Uncorrected, No Action required), and signal changed to +#CMCI. Just to add to the confusion, Linux does take an action (in +uc_decode_notifier()) to try to offline the page despite the UC*NA* +signature name. + +### Background: why #CMCI and #MCE race when poison is consuming in + Intel platform [1] + +Having decided that CMCI/UCNA is the best action for patrol scrub errors, +the memory controller uses it for reads too. But the memory controller is +executing asynchronously from the core, and can't tell the difference +between a "real" read and a speculative read. So it will do CMCI/UCNA if +an error is found in any read. + +Thus: + +1) Core is clever and thinks address A is needed soon, issues a + speculative read. + +2) Core finds it is going to use address A soon after sending the read + request + +3) The CMCI from the memory controller is in a race with MCE from the + core that will soon try to retire the load from address A. + +Quite often (because speculation has got better) the CMCI from the memory +controller is delivered before the core is committed to the instruction +reading address A, so the interrupt is taken, and Linux offlines the page +(marking it as poison). + + +## Why user process is killed for instr case + +Commit 046545a661af ("mm/hwpoison: fix error page recovered but reported +"not recovered"") tries to fix noise message "Memory error not recovered" +and skips duplicate SIGBUSs due to the race. But it also introduced a bug +that kill_accessing_process() return -EHWPOISON for instr case, as result, +kill_me_maybe() send a SIGBUS to user process. + +# 4. The fix, in my opinion, should be: + +- For copyin case: + +The key point is whether the error context is in a read from user memory. +We do not care about the ex-type if we know its a MOV reading from +userspace. + +is_copy_from_user() return true when both of the following two checks are +true: + + - the current instruction is copy + - source address is user memory + +If copy_user is true, we set + +m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + +Then do_machine_check() will try fixup_exception() first. + +- For instr case: let kill_accessing_process() return 0 to prevent a SIGBUS. + +- For patch 3: + +The return value of memory_failure() is quite important while discussed +instr case regression with Tony and Miaohe for patch 2, so add comment +about the return value. + + +This patch (of 3): + +Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new +extable fixup type, EX_TYPE_EFAULT_REG, and commit 4c132d1d844a +("x86/futex: Remove .fixup usage") updated the extable fixup type for +copy-from-user operations, changing it from EX_TYPE_UACCESS to +EX_TYPE_EFAULT_REG. The error context for copy-from-user operations no +longer functions as an in-kernel recovery context. Consequently, the +error context for copy-from-user operations no longer functions as an +in-kernel recovery context, resulting in kernel panics with the message: +"Machine check: Data load in unrecoverable area of kernel." + +To address this, it is crucial to identify if an error context involves a +read operation from user memory. The function is_copy_from_user() can be +utilized to determine: + + - the current operation is copy + - when reading user memory + +When these conditions are met, is_copy_from_user() will return true, +confirming that it is indeed a direct copy from user memory. This check +is essential for correctly handling the context of errors in these +operations without relying on the extable fixup types that previously +allowed for in-kernel recovery. + +So, use is_copy_from_user() to determine if a context is copy user directly. + +Link: https://lkml.kernel.org/r/20250312112852.82415-1-xueshuai@linux.alibaba.com +Link: https://lkml.kernel.org/r/20250312112852.82415-2-xueshuai@linux.alibaba.com +Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage") +Signed-off-by: Shuai Xue +Suggested-by: Peter Zijlstra +Acked-by: Borislav Petkov (AMD) +Tested-by: Tony Luck +Cc: Baolin Wang +Cc: Borislav Betkov +Cc: Catalin Marinas +Cc: Dave Hansen +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Josh Poimboeuf +Cc: Miaohe Lin +Cc: Naoya Horiguchi +Cc: Ruidong Tian +Cc: Thomas Gleinxer +Cc: Yazen Ghannam +Cc: Jane Chu +Cc: Jarkko Sakkinen +Cc: Jonathan Cameron +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/mce/severity.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/arch/x86/kernel/cpu/mce/severity.c ++++ b/arch/x86/kernel/cpu/mce/severity.c +@@ -300,13 +300,12 @@ static noinstr int error_context(struct + copy_user = is_copy_from_user(regs); + instrumentation_end(); + +- switch (fixup_type) { +- case EX_TYPE_UACCESS: +- if (!copy_user) +- return IN_KERNEL; +- m->kflags |= MCE_IN_KERNEL_COPYIN; +- fallthrough; ++ if (copy_user) { ++ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; ++ return IN_KERNEL_RECOV; ++ } + ++ switch (fixup_type) { + case EX_TYPE_FAULT_MCE_SAFE: + case EX_TYPE_DEFAULT_MCE_SAFE: + m->kflags |= MCE_IN_KERNEL_RECOV; diff --git a/queue-6.14/x86-microcode-amd-fix-__apply_microcode_amd-s-return-value.patch b/queue-6.14/x86-microcode-amd-fix-__apply_microcode_amd-s-return-value.patch new file mode 100644 index 0000000000..92ef4f4344 --- /dev/null +++ b/queue-6.14/x86-microcode-amd-fix-__apply_microcode_amd-s-return-value.patch @@ -0,0 +1,33 @@ +From 31ab12df723543047c3fc19cb8f8c4498ec6267f Mon Sep 17 00:00:00 2001 +From: Boris Ostrovsky +Date: Thu, 27 Mar 2025 19:05:02 -0400 +Subject: x86/microcode/AMD: Fix __apply_microcode_amd()'s return value + +From: Boris Ostrovsky + +commit 31ab12df723543047c3fc19cb8f8c4498ec6267f upstream. + +When verify_sha256_digest() fails, __apply_microcode_amd() should propagate +the failure by returning false (and not -1 which is promoted to true). + +Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") +Signed-off-by: Boris Ostrovsky +Signed-off-by: Borislav Petkov (AMD) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250327230503.1850368-2-boris.ostrovsky@oracle.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/microcode/amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/microcode/amd.c ++++ b/arch/x86/kernel/cpu/microcode/amd.c +@@ -600,7 +600,7 @@ static bool __apply_microcode_amd(struct + unsigned long p_addr = (unsigned long)&mc->hdr.data_code; + + if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize)) +- return -1; ++ return false; + + native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); + diff --git a/queue-6.14/x86-paravirt-move-halt-paravirt-calls-under-config_paravirt.patch b/queue-6.14/x86-paravirt-move-halt-paravirt-calls-under-config_paravirt.patch new file mode 100644 index 0000000000..b88f795673 --- /dev/null +++ b/queue-6.14/x86-paravirt-move-halt-paravirt-calls-under-config_paravirt.patch @@ -0,0 +1,196 @@ +From 22cc5ca5de52bbfc36a7d4a55323f91fb4492264 Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Fri, 28 Feb 2025 01:44:14 +0000 +Subject: x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT + +From: Kirill A. Shutemov + +commit 22cc5ca5de52bbfc36a7d4a55323f91fb4492264 upstream. + +CONFIG_PARAVIRT_XXL is mainly defined/used by XEN PV guests. For +other VM guest types, features supported under CONFIG_PARAVIRT +are self sufficient. CONFIG_PARAVIRT mainly provides support for +TLB flush operations and time related operations. + +For TDX guest as well, paravirt calls under CONFIG_PARVIRT meets +most of its requirement except the need of HLT and SAFE_HLT +paravirt calls, which is currently defined under +CONFIG_PARAVIRT_XXL. + +Since enabling CONFIG_PARAVIRT_XXL is too bloated for TDX guest +like platforms, move HLT and SAFE_HLT paravirt calls under +CONFIG_PARAVIRT. + +Moving HLT and SAFE_HLT paravirt calls are not fatal and should not +break any functionality for current users of CONFIG_PARAVIRT. + +Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") +Co-developed-by: Kuppuswamy Sathyanarayanan +Signed-off-by: Kuppuswamy Sathyanarayanan +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Vishal Annapurve +Signed-off-by: Ingo Molnar +Reviewed-by: Andi Kleen +Reviewed-by: Tony Luck +Reviewed-by: Juergen Gross +Tested-by: Ryan Afranji +Cc: Andy Lutomirski +Cc: Brian Gerst +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Josh Poimboeuf +Cc: stable@kernel.org +Link: https://lore.kernel.org/r/20250228014416.3925664-2-vannapurve@google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/irqflags.h | 40 ++++++++++++++++++---------------- + arch/x86/include/asm/paravirt.h | 20 ++++++++--------- + arch/x86/include/asm/paravirt_types.h | 3 -- + arch/x86/kernel/paravirt.c | 14 ++++++----- + 4 files changed, 41 insertions(+), 36 deletions(-) + +--- a/arch/x86/include/asm/irqflags.h ++++ b/arch/x86/include/asm/irqflags.h +@@ -76,6 +76,28 @@ static __always_inline void native_local + + #endif + ++#ifndef CONFIG_PARAVIRT ++#ifndef __ASSEMBLY__ ++/* ++ * Used in the idle loop; sti takes one instruction cycle ++ * to complete: ++ */ ++static __always_inline void arch_safe_halt(void) ++{ ++ native_safe_halt(); ++} ++ ++/* ++ * Used when interrupts are already enabled or to ++ * shutdown the processor: ++ */ ++static __always_inline void halt(void) ++{ ++ native_halt(); ++} ++#endif /* __ASSEMBLY__ */ ++#endif /* CONFIG_PARAVIRT */ ++ + #ifdef CONFIG_PARAVIRT_XXL + #include + #else +@@ -98,24 +120,6 @@ static __always_inline void arch_local_i + } + + /* +- * Used in the idle loop; sti takes one instruction cycle +- * to complete: +- */ +-static __always_inline void arch_safe_halt(void) +-{ +- native_safe_halt(); +-} +- +-/* +- * Used when interrupts are already enabled or to +- * shutdown the processor: +- */ +-static __always_inline void halt(void) +-{ +- native_halt(); +-} +- +-/* + * For spinlocks, etc: + */ + static __always_inline unsigned long arch_local_irq_save(void) +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -107,6 +107,16 @@ static inline void notify_page_enc_statu + PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); + } + ++static __always_inline void arch_safe_halt(void) ++{ ++ PVOP_VCALL0(irq.safe_halt); ++} ++ ++static inline void halt(void) ++{ ++ PVOP_VCALL0(irq.halt); ++} ++ + #ifdef CONFIG_PARAVIRT_XXL + static inline void load_sp0(unsigned long sp0) + { +@@ -170,16 +180,6 @@ static inline void __write_cr4(unsigned + PVOP_VCALL1(cpu.write_cr4, x); + } + +-static __always_inline void arch_safe_halt(void) +-{ +- PVOP_VCALL0(irq.safe_halt); +-} +- +-static inline void halt(void) +-{ +- PVOP_VCALL0(irq.halt); +-} +- + static inline u64 paravirt_read_msr(unsigned msr) + { + return PVOP_CALL1(u64, cpu.read_msr, msr); +--- a/arch/x86/include/asm/paravirt_types.h ++++ b/arch/x86/include/asm/paravirt_types.h +@@ -120,10 +120,9 @@ struct pv_irq_ops { + struct paravirt_callee_save save_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; +- ++#endif + void (*safe_halt)(void); + void (*halt)(void); +-#endif + } __no_randomize_layout; + + struct pv_mmu_ops { +--- a/arch/x86/kernel/paravirt.c ++++ b/arch/x86/kernel/paravirt.c +@@ -110,6 +110,11 @@ int paravirt_disable_iospace(void) + return request_resource(&ioport_resource, &reserve_ioports); + } + ++static noinstr void pv_native_safe_halt(void) ++{ ++ native_safe_halt(); ++} ++ + #ifdef CONFIG_PARAVIRT_XXL + static noinstr void pv_native_write_cr2(unsigned long val) + { +@@ -125,11 +130,6 @@ static noinstr void pv_native_set_debugr + { + native_set_debugreg(regno, val); + } +- +-static noinstr void pv_native_safe_halt(void) +-{ +- native_safe_halt(); +-} + #endif + + struct pv_info pv_info = { +@@ -186,9 +186,11 @@ struct paravirt_patch_template pv_ops = + .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), + .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), + .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), ++#endif /* CONFIG_PARAVIRT_XXL */ ++ ++ /* Irq HLT ops. */ + .irq.safe_halt = pv_native_safe_halt, + .irq.halt = native_halt, +-#endif /* CONFIG_PARAVIRT_XXL */ + + /* Mmu ops. */ + .mmu.flush_tlb_user = native_flush_tlb_local, diff --git a/queue-6.14/x86-tdx-fix-arch_safe_halt-execution-for-tdx-vms.patch b/queue-6.14/x86-tdx-fix-arch_safe_halt-execution-for-tdx-vms.patch new file mode 100644 index 0000000000..a21267b418 --- /dev/null +++ b/queue-6.14/x86-tdx-fix-arch_safe_halt-execution-for-tdx-vms.patch @@ -0,0 +1,159 @@ +From 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 Mon Sep 17 00:00:00 2001 +From: Vishal Annapurve +Date: Fri, 28 Feb 2025 01:44:15 +0000 +Subject: x86/tdx: Fix arch_safe_halt() execution for TDX VMs + +From: Vishal Annapurve + +commit 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 upstream. + +Direct HLT instruction execution causes #VEs for TDX VMs which is routed +to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE +handler will enable interrupts before TDCALL is routed to hypervisor +leading to missed wakeup events, as current TDX spec doesn't expose +interruptibility state information to allow #VE handler to selectively +enable interrupts. + +Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") +prevented the idle routines from executing HLT instruction in STI-shadow. +But it missed the paravirt routine which can be reached via this path +as an example: + + kvm_wait() => + safe_halt() => + raw_safe_halt() => + arch_safe_halt() => + irq.safe_halt() => + pv_native_safe_halt() + +To reliably handle arch_safe_halt() for TDX VMs, introduce explicit +dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() +routines with TDX-safe versions that execute direct TDCALL and needed +interrupt flag updates. Executing direct TDCALL brings in additional +benefit of avoiding HLT related #VEs altogether. + +As tested by Ryan Afranji: + + "Tested with the specjbb2015 benchmark. It has heavy lock contention which leads + to many halt calls. TDX VMs suffered a poor score before this patchset. + + Verified the major performance improvement with this patchset applied." + +Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") +Signed-off-by: Vishal Annapurve +Signed-off-by: Ingo Molnar +Reviewed-by: Kirill A. Shutemov +Tested-by: Ryan Afranji +Cc: Andy Lutomirski +Cc: Brian Gerst +Cc: Juergen Gross +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Josh Poimboeuf +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Kconfig | 1 + + arch/x86/coco/tdx/tdx.c | 26 +++++++++++++++++++++++++- + arch/x86/include/asm/tdx.h | 4 ++-- + arch/x86/kernel/process.c | 2 +- + 4 files changed, 29 insertions(+), 4 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -902,6 +902,7 @@ config INTEL_TDX_GUEST + depends on X86_64 && CPU_SUP_INTEL + depends on X86_X2APIC + depends on EFI_STUB ++ depends on PARAVIRT + select ARCH_HAS_CC_PLATFORM + select X86_MEM_ENCRYPT + select X86_MCE +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *v + return ve_instr_len(ve); + } + +-void __cpuidle tdx_safe_halt(void) ++void __cpuidle tdx_halt(void) + { + const bool irq_disabled = false; + +@@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void) + WARN_ONCE(1, "HLT instruction emulation failed\n"); + } + ++static void __cpuidle tdx_safe_halt(void) ++{ ++ tdx_halt(); ++ /* ++ * "__cpuidle" section doesn't support instrumentation, so stick ++ * with raw_* variant that avoids tracing hooks. ++ */ ++ raw_local_irq_enable(); ++} ++ + static int read_msr(struct pt_regs *regs, struct ve_info *ve) + { + struct tdx_module_args args = { +@@ -1110,6 +1121,19 @@ void __init tdx_early_init(void) + x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; + + /* ++ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that ++ * will enable interrupts before HLT TDCALL invocation if executed ++ * in STI-shadow, possibly resulting in missed wakeup events. ++ * ++ * Modify all possible HLT execution paths to use TDX specific routines ++ * that directly execute TDCALL and toggle the interrupt state as ++ * needed after TDCALL completion. This also reduces HLT related #VEs ++ * in addition to having a reliable halt logic execution. ++ */ ++ pv_ops.irq.safe_halt = tdx_safe_halt; ++ pv_ops.irq.halt = tdx_halt; ++ ++ /* + * TDX intercepts the RDMSR to read the X2APIC ID in the parallel + * bringup low level code. That raises #VE which cannot be handled + * there. +--- a/arch/x86/include/asm/tdx.h ++++ b/arch/x86/include/asm/tdx.h +@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve) + + bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); + +-void tdx_safe_halt(void); ++void tdx_halt(void); + + bool tdx_early_handle_ve(struct pt_regs *regs); + +@@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls + #else + + static inline void tdx_early_init(void) { }; +-static inline void tdx_safe_halt(void) { }; ++static inline void tdx_halt(void) { }; + + static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } + +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -939,7 +939,7 @@ void __init select_idle_routine(void) + static_call_update(x86_idle, mwait_idle); + } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { + pr_info("using TDX aware idle routine\n"); +- static_call_update(x86_idle, tdx_safe_halt); ++ static_call_update(x86_idle, tdx_halt); + } else { + static_call_update(x86_idle, default_idle); + }