From: Greg Kroah-Hartman Date: Mon, 20 Nov 2023 15:32:21 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v4.14.331~140 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b41c113071d44d738d002e051da74ec074e6b81f;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch crypto-x86-sha-load-modules-based-on-cpu-features.patch drivers-perf-check-find_first_bit-return-value.patch i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch kvm-x86-ignore-msr_amd64_tw_cfg-access.patch media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch perf-arm_cspmu-reject-events-meant-for-other-pmus.patch perf-core-fix-cpuctx-refcounting.patch perf-intel-pt-fix-async-branch-flags.patch powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch scsi-mpt3sas-fix-loop-logic.patch scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch spi-fix-null-dereference-on-suspend.patch x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch x86-shstk-delay-signal-entry-ssp-write-until-after-user-accesses.patch --- diff --git a/queue-6.6/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch b/queue-6.6/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch new file mode 100644 index 00000000000..4737b5d6d25 --- /dev/null +++ b/queue-6.6/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch @@ -0,0 +1,68 @@ +From 811c363645b33e6e22658634329e95f383dfc705 Mon Sep 17 00:00:00 2001 +From: Hao Sun +Date: Wed, 1 Nov 2023 13:33:51 +0100 +Subject: bpf: Fix check_stack_write_fixed_off() to correctly spill imm + +From: Hao Sun + +commit 811c363645b33e6e22658634329e95f383dfc705 upstream. + +In check_stack_write_fixed_off(), imm value is cast to u32 before being +spilled to the stack. Therefore, the sign information is lost, and the +range information is incorrect when load from the stack again. + +For the following prog: +0: r2 = r10 +1: *(u64*)(r2 -40) = -44 +2: r0 = *(u64*)(r2 - 40) +3: if r0 s<= 0xa goto +2 +4: r0 = 1 +5: exit +6: r0 = 0 +7: exit + +The verifier gives: +func#0 @0 +0: R1=ctx(off=0,imm=0) R10=fp0 +0: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 +1: (7a) *(u64 *)(r2 -40) = -44 ; R2_w=fp0 fp-40_w=4294967252 +2: (79) r0 = *(u64 *)(r2 -40) ; R0_w=4294967252 R2_w=fp0 +fp-40_w=4294967252 +3: (c5) if r0 s< 0xa goto pc+2 +mark_precise: frame0: last_idx 3 first_idx 0 subseq_idx -1 +mark_precise: frame0: regs=r0 stack= before 2: (79) r0 = *(u64 *)(r2 -40) +3: R0_w=4294967252 +4: (b7) r0 = 1 ; R0_w=1 +5: (95) exit +verification time 7971 usec +stack depth 40 +processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 +peak_states 0 mark_read 0 + +So remove the incorrect cast, since imm field is declared as s32, and +__mark_reg_known() takes u64, so imm would be correctly sign extended +by compiler. + +Fixes: ecdf985d7615 ("bpf: track immediate values written to stack by BPF_ST instruction") +Cc: stable@vger.kernel.org +Signed-off-by: Hao Sun +Acked-by: Shung-Hsi Yu +Acked-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231101-fix-check-stack-write-v3-1-f05c2b1473d5@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4376,7 +4376,7 @@ static int check_stack_write_fixed_off(s + insn->imm != 0 && env->bpf_capable) { + struct bpf_reg_state fake_reg = {}; + +- __mark_reg_known(&fake_reg, (u32)insn->imm); ++ __mark_reg_known(&fake_reg, insn->imm); + fake_reg.type = SCALAR_VALUE; + save_register_state(state, spi, &fake_reg, size); + } else if (reg && is_spillable_regtype(reg->type)) { diff --git a/queue-6.6/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch b/queue-6.6/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch new file mode 100644 index 00000000000..95f1dd02484 --- /dev/null +++ b/queue-6.6/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch @@ -0,0 +1,60 @@ +From 291d044fd51f8484066300ee42afecf8c8db7b3a Mon Sep 17 00:00:00 2001 +From: Shung-Hsi Yu +Date: Thu, 2 Nov 2023 13:39:03 +0800 +Subject: bpf: Fix precision tracking for BPF_ALU | BPF_TO_BE | BPF_END +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shung-Hsi Yu + +commit 291d044fd51f8484066300ee42afecf8c8db7b3a upstream. + +BPF_END and BPF_NEG has a different specification for the source bit in +the opcode compared to other ALU/ALU64 instructions, and is either +reserved or use to specify the byte swap endianness. In both cases the +source bit does not encode source operand location, and src_reg is a +reserved field. + +backtrack_insn() currently does not differentiate BPF_END and BPF_NEG +from other ALU/ALU64 instructions, which leads to r0 being incorrectly +marked as precise when processing BPF_ALU | BPF_TO_BE | BPF_END +instructions. This commit teaches backtrack_insn() to correctly mark +precision for such case. + +While precise tracking of BPF_NEG and other BPF_END instructions are +correct and does not need fixing, this commit opt to process all BPF_NEG +and BPF_END instructions within the same if-clause to better align with +current convention used in the verifier (e.g. check_alu_op). + +Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") +Cc: stable@vger.kernel.org +Reported-by: Mohamed Mahmoud +Closes: https://lore.kernel.org/r/87jzrrwptf.fsf@toke.dk +Tested-by: Toke Høiland-Jørgensen +Tested-by: Tao Lyu +Acked-by: Eduard Zingerman +Signed-off-by: Shung-Hsi Yu +Link: https://lore.kernel.org/r/20231102053913.12004-2-shung-hsi.yu@suse.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3444,7 +3444,12 @@ static int backtrack_insn(struct bpf_ver + if (class == BPF_ALU || class == BPF_ALU64) { + if (!bt_is_reg_set(bt, dreg)) + return 0; +- if (opcode == BPF_MOV) { ++ if (opcode == BPF_END || opcode == BPF_NEG) { ++ /* sreg is reserved and unused ++ * dreg still need precision before this insn ++ */ ++ return 0; ++ } else if (opcode == BPF_MOV) { + if (BPF_SRC(insn->code) == BPF_X) { + /* dreg = sreg or dreg = (s8, s16, s32)sreg + * dreg needs precision after this insn diff --git a/queue-6.6/crypto-x86-sha-load-modules-based-on-cpu-features.patch b/queue-6.6/crypto-x86-sha-load-modules-based-on-cpu-features.patch new file mode 100644 index 00000000000..7d205dfd495 --- /dev/null +++ b/queue-6.6/crypto-x86-sha-load-modules-based-on-cpu-features.patch @@ -0,0 +1,102 @@ +From 1c43c0f1f84aa59dfc98ce66f0a67b2922aa7f9d Mon Sep 17 00:00:00 2001 +From: Roxana Nicolescu +Date: Fri, 15 Sep 2023 12:23:25 +0200 +Subject: crypto: x86/sha - load modules based on CPU features + +From: Roxana Nicolescu + +commit 1c43c0f1f84aa59dfc98ce66f0a67b2922aa7f9d upstream. + +x86 optimized crypto modules are built as modules rather than build-in and +they are not loaded when the crypto API is initialized, resulting in the +generic builtin module (sha1-generic) being used instead. + +It was discovered when creating a sha1/sha256 checksum of a 2Gb file by +using kcapi-tools because it would take significantly longer than creating +a sha512 checksum of the same file. trace-cmd showed that for sha1/256 the +generic module was used, whereas for sha512 the optimized module was used +instead. + +Add module aliases() for these x86 optimized crypto modules based on CPU +feature bits so udev gets a chance to load them later in the boot +process. This resulted in ~3x decrease in the real-time execution of +kcapi-dsg. + +Fix is inspired from commit +aa031b8f702e ("crypto: x86/sha512 - load based on CPU features") +where a similar fix was done for sha512. + +Cc: stable@vger.kernel.org # 5.15+ +Suggested-by: Dimitri John Ledkov +Suggested-by: Julian Andres Klode +Signed-off-by: Roxana Nicolescu +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/crypto/sha1_ssse3_glue.c | 12 ++++++++++++ + arch/x86/crypto/sha256_ssse3_glue.c | 12 ++++++++++++ + 2 files changed, 24 insertions(+) + +--- a/arch/x86/crypto/sha1_ssse3_glue.c ++++ b/arch/x86/crypto/sha1_ssse3_glue.c +@@ -24,8 +24,17 @@ + #include + #include + #include ++#include + #include + ++static const struct x86_cpu_id module_cpu_ids[] = { ++ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), ++ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), ++ X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), ++ {} ++}; ++MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); ++ + static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha1_block_fn *sha1_xform) + { +@@ -301,6 +310,9 @@ static inline void unregister_sha1_ni(vo + + static int __init sha1_ssse3_mod_init(void) + { ++ if (!x86_match_cpu(module_cpu_ids)) ++ return -ENODEV; ++ + if (register_sha1_ssse3()) + goto fail; + +--- a/arch/x86/crypto/sha256_ssse3_glue.c ++++ b/arch/x86/crypto/sha256_ssse3_glue.c +@@ -38,11 +38,20 @@ + #include + #include + #include ++#include + #include + + asmlinkage void sha256_transform_ssse3(struct sha256_state *state, + const u8 *data, int blocks); + ++static const struct x86_cpu_id module_cpu_ids[] = { ++ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), ++ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), ++ X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), ++ {} ++}; ++MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); ++ + static int _sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_block_fn *sha256_xform) + { +@@ -366,6 +375,9 @@ static inline void unregister_sha256_ni( + + static int __init sha256_ssse3_mod_init(void) + { ++ if (!x86_match_cpu(module_cpu_ids)) ++ return -ENODEV; ++ + if (register_sha256_ssse3()) + goto fail; + diff --git a/queue-6.6/drivers-perf-check-find_first_bit-return-value.patch b/queue-6.6/drivers-perf-check-find_first_bit-return-value.patch new file mode 100644 index 00000000000..ceb474d67a5 --- /dev/null +++ b/queue-6.6/drivers-perf-check-find_first_bit-return-value.patch @@ -0,0 +1,67 @@ +From c6e316ac05532febb0c966fa9b55f5258ed037be Mon Sep 17 00:00:00 2001 +From: Alexandre Ghiti +Date: Thu, 9 Nov 2023 09:21:28 +0100 +Subject: drivers: perf: Check find_first_bit() return value + +From: Alexandre Ghiti + +commit c6e316ac05532febb0c966fa9b55f5258ed037be upstream. + +We must check the return value of find_first_bit() before using the +return value as an index array since it happens to overflow the array +and then panic: + +[ 107.318430] Kernel BUG [#1] +[ 107.319434] CPU: 3 PID: 1238 Comm: kill Tainted: G E 6.6.0-rc6ubuntu-defconfig #2 +[ 107.319465] Hardware name: riscv-virtio,qemu (DT) +[ 107.319551] epc : pmu_sbi_ovf_handler+0x3a4/0x3ae +[ 107.319840] ra : pmu_sbi_ovf_handler+0x52/0x3ae +[ 107.319868] epc : ffffffff80a0a77c ra : ffffffff80a0a42a sp : ffffaf83fecda350 +[ 107.319884] gp : ffffffff823961a8 tp : ffffaf8083db1dc0 t0 : ffffaf83fecda480 +[ 107.319899] t1 : ffffffff80cafe62 t2 : 000000000000ff00 s0 : ffffaf83fecda520 +[ 107.319921] s1 : ffffaf83fecda380 a0 : 00000018fca29df0 a1 : ffffffffffffffff +[ 107.319936] a2 : 0000000001073734 a3 : 0000000000000004 a4 : 0000000000000000 +[ 107.319951] a5 : 0000000000000040 a6 : 000000001d1c8774 a7 : 0000000000504d55 +[ 107.319965] s2 : ffffffff82451f10 s3 : ffffffff82724e70 s4 : 000000000000003f +[ 107.319980] s5 : 0000000000000011 s6 : ffffaf8083db27c0 s7 : 0000000000000000 +[ 107.319995] s8 : 0000000000000001 s9 : 00007fffb45d6558 s10: 00007fffb45d81a0 +[ 107.320009] s11: ffffaf7ffff60000 t3 : 0000000000000004 t4 : 0000000000000000 +[ 107.320023] t5 : ffffaf7f80000000 t6 : ffffaf8000000000 +[ 107.320037] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 +[ 107.320081] [] pmu_sbi_ovf_handler+0x3a4/0x3ae +[ 107.320112] [] handle_percpu_devid_irq+0x9e/0x1a0 +[ 107.320131] [] generic_handle_domain_irq+0x28/0x36 +[ 107.320148] [] riscv_intc_irq+0x36/0x4e +[ 107.320166] [] handle_riscv_irq+0x54/0x86 +[ 107.320189] [] do_irq+0x64/0x96 +[ 107.320271] Code: 85a6 855e b097 ff7f 80e7 9220 b709 9002 4501 bbd9 (9002) 6097 +[ 107.320585] ---[ end trace 0000000000000000 ]--- +[ 107.320704] Kernel panic - not syncing: Fatal exception in interrupt +[ 107.320775] SMP: stopping secondary CPUs +[ 107.321219] Kernel Offset: 0x0 from 0xffffffff80000000 +[ 107.333051] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- + +Fixes: 4905ec2fb7e6 ("RISC-V: Add sscofpmf extension support") +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20231109082128.40777-1-alexghiti@rivosinc.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + drivers/perf/riscv_pmu_sbi.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/perf/riscv_pmu_sbi.c ++++ b/drivers/perf/riscv_pmu_sbi.c +@@ -687,6 +687,11 @@ static irqreturn_t pmu_sbi_ovf_handler(i + + /* Firmware counter don't support overflow yet */ + fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); ++ if (fidx == RISCV_MAX_COUNTERS) { ++ csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); ++ return IRQ_NONE; ++ } ++ + event = cpu_hw_evt->events[fidx]; + if (!event) { + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); diff --git a/queue-6.6/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch b/queue-6.6/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch new file mode 100644 index 00000000000..2c33dfc693c --- /dev/null +++ b/queue-6.6/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch @@ -0,0 +1,71 @@ +From 471aa951bf1206d3c10d0daa67005b8e4db4ff83 Mon Sep 17 00:00:00 2001 +From: Harshit Mogalapalli +Date: Fri, 27 Oct 2023 10:28:22 -0700 +Subject: i915/perf: Fix NULL deref bugs with drm_dbg() calls + +From: Harshit Mogalapalli + +commit 471aa951bf1206d3c10d0daa67005b8e4db4ff83 upstream. + +When i915 perf interface is not available dereferencing it will lead to +NULL dereferences. + +As returning -ENOTSUPP is pretty clear return when perf interface is not +available. + +Fixes: 2fec539112e8 ("i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call") +Suggested-by: Tvrtko Ursulin +Signed-off-by: Harshit Mogalapalli +Reviewed-by: Tvrtko Ursulin +Cc: # v6.0+ +Signed-off-by: Tvrtko Ursulin +Link: https://patchwork.freedesktop.org/patch/msgid/20231027172822.2753059-1-harshit.m.mogalapalli@oracle.com +[tursulin: added stable tag] +(cherry picked from commit 36f27350ff745bd228ab04d7845dfbffc177a889) +Signed-off-by: Jani Nikula +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/i915_perf.c | 15 +++------------ + 1 file changed, 3 insertions(+), 12 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_perf.c ++++ b/drivers/gpu/drm/i915/i915_perf.c +@@ -4286,11 +4286,8 @@ int i915_perf_open_ioctl(struct drm_devi + u32 known_open_flags; + int ret; + +- if (!perf->i915) { +- drm_dbg(&perf->i915->drm, +- "i915 perf interface not available for this system\n"); ++ if (!perf->i915) + return -ENOTSUPP; +- } + + known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | + I915_PERF_FLAG_FD_NONBLOCK | +@@ -4666,11 +4663,8 @@ int i915_perf_add_config_ioctl(struct dr + struct i915_oa_reg *regs; + int err, id; + +- if (!perf->i915) { +- drm_dbg(&perf->i915->drm, +- "i915 perf interface not available for this system\n"); ++ if (!perf->i915) + return -ENOTSUPP; +- } + + if (!perf->metrics_kobj) { + drm_dbg(&perf->i915->drm, +@@ -4832,11 +4826,8 @@ int i915_perf_remove_config_ioctl(struct + struct i915_oa_config *oa_config; + int ret; + +- if (!perf->i915) { +- drm_dbg(&perf->i915->drm, +- "i915 perf interface not available for this system\n"); ++ if (!perf->i915) + return -ENOTSUPP; +- } + + if (i915_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&perf->i915->drm, diff --git a/queue-6.6/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch b/queue-6.6/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch new file mode 100644 index 00000000000..742c5cd0fc7 --- /dev/null +++ b/queue-6.6/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch @@ -0,0 +1,85 @@ +From 629d3698f6958ee6f8131ea324af794f973b12ac Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Thu, 14 Sep 2023 13:55:04 +0800 +Subject: KVM: x86: Clear bit12 of ICR after APIC-write VM-exit + +From: Tao Su + +commit 629d3698f6958ee6f8131ea324af794f973b12ac upstream. + +When IPI virtualization is enabled, a WARN is triggered if bit12 of ICR +MSR is set after APIC-write VM-exit. The reason is kvm_apic_send_ipi() +thinks the APIC_ICR_BUSY bit should be cleared because KVM has no delay, +but kvm_apic_write_nodecode() doesn't clear the APIC_ICR_BUSY bit. + +Under the x2APIC section, regarding ICR, the SDM says: + + It remains readable only to aid in debugging; however, software should + not assume the value returned by reading the ICR is the last written + value. + +I.e. the guest is allowed to set bit 12. However, the SDM also gives KVM +free reign to do whatever it wants with the bit, so long as KVM's behavior +doesn't confuse userspace or break KVM's ABI. + +Clear bit 12 so that it reads back as '0'. This approach is safer than +"do nothing" and is consistent with the case where IPI virtualization is +disabled or not supported, i.e., + + handle_fastpath_set_x2apic_icr_irqoff() -> kvm_x2apic_icr_write() + +Opportunistically replace the TODO with a comment calling out that eating +the write is likely faster than a conditional branch around the busy bit. + +Link: https://lore.kernel.org/all/ZPj6iF0Q7iynn62p@google.com/ +Fixes: 5413bcba7ed5 ("KVM: x86: Add support for vICR APIC-write VM-Exits in x2APIC mode") +Cc: stable@vger.kernel.org +Signed-off-by: Tao Su +Tested-by: Yi Lai +Reviewed-by: Chao Gao +Link: https://lore.kernel.org/r/20230914055504.151365-1-tao1.su@linux.intel.com +[sean: tweak changelog, replace TODO with comment, drop local "val"] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/lapic.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -2444,22 +2444,22 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); + void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) + { + struct kvm_lapic *apic = vcpu->arch.apic; +- u64 val; + + /* +- * ICR is a single 64-bit register when x2APIC is enabled. For legacy +- * xAPIC, ICR writes need to go down the common (slightly slower) path +- * to get the upper half from ICR2. ++ * ICR is a single 64-bit register when x2APIC is enabled, all others ++ * registers hold 32-bit values. For legacy xAPIC, ICR writes need to ++ * go down the common path to get the upper half from ICR2. ++ * ++ * Note, using the write helpers may incur an unnecessary write to the ++ * virtual APIC state, but KVM needs to conditionally modify the value ++ * in certain cases, e.g. to clear the ICR busy bit. The cost of extra ++ * conditional branches is likely a wash relative to the cost of the ++ * maybe-unecessary write, and both are in the noise anyways. + */ +- if (apic_x2apic_mode(apic) && offset == APIC_ICR) { +- val = kvm_lapic_get_reg64(apic, APIC_ICR); +- kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32)); +- trace_kvm_apic_write(APIC_ICR, val); +- } else { +- /* TODO: optimize to just emulate side effect w/o one more write */ +- val = kvm_lapic_get_reg(apic, offset); +- kvm_lapic_reg_write(apic, offset, (u32)val); +- } ++ if (apic_x2apic_mode(apic) && offset == APIC_ICR) ++ kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); ++ else ++ kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); + } + EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); + diff --git a/queue-6.6/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch b/queue-6.6/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch new file mode 100644 index 00000000000..860cc9921ba --- /dev/null +++ b/queue-6.6/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch @@ -0,0 +1,117 @@ +From 9cfec6d097c607e36199cf0cfbb8cf5acbd8e9b2 Mon Sep 17 00:00:00 2001 +From: Haitao Shan +Date: Tue, 12 Sep 2023 16:55:45 -0700 +Subject: KVM: x86: Fix lapic timer interrupt lost after loading a snapshot. + +From: Haitao Shan + +commit 9cfec6d097c607e36199cf0cfbb8cf5acbd8e9b2 upstream. + +When running android emulator (which is based on QEMU 2.12) on +certain Intel hosts with kernel version 6.3-rc1 or above, guest +will freeze after loading a snapshot. This is almost 100% +reproducible. By default, the android emulator will use snapshot +to speed up the next launching of the same android guest. So +this breaks the android emulator badly. + +I tested QEMU 8.0.4 from Debian 12 with an Ubuntu 22.04 guest by +running command "loadvm" after "savevm". The same issue is +observed. At the same time, none of our AMD platforms is impacted. +More experiments show that loading the KVM module with +"enable_apicv=false" can workaround it. + +The issue started to show up after commit 8e6ed96cdd50 ("KVM: x86: +fire timer when it is migrated and expired, and in oneshot mode"). +However, as is pointed out by Sean Christopherson, it is introduced +by commit 967235d32032 ("KVM: vmx: clear pending interrupts on +KVM_SET_LAPIC"). commit 8e6ed96cdd50 ("KVM: x86: fire timer when +it is migrated and expired, and in oneshot mode") just makes it +easier to hit the issue. + +Having both commits, the oneshot lapic timer gets fired immediately +inside the KVM_SET_LAPIC call when loading the snapshot. On Intel +platforms with APIC virtualization and posted interrupt processing, +this eventually leads to setting the corresponding PIR bit. However, +the whole PIR bits get cleared later in the same KVM_SET_LAPIC call +by apicv_post_state_restore. This leads to timer interrupt lost. + +The fix is to move vmx_apicv_post_state_restore to the beginning of +the KVM_SET_LAPIC call and rename to vmx_apicv_pre_state_restore. +What vmx_apicv_post_state_restore does is actually clearing any +former apicv state and this behavior is more suitable to carry out +in the beginning. + +Fixes: 967235d32032 ("KVM: vmx: clear pending interrupts on KVM_SET_LAPIC") +Cc: stable@vger.kernel.org +Suggested-by: Sean Christopherson +Signed-off-by: Haitao Shan +Link: https://lore.kernel.org/r/20230913000215.478387-1-hshan@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm-x86-ops.h | 1 + + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/lapic.c | 4 ++++ + arch/x86/kvm/vmx/vmx.c | 4 ++-- + 4 files changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/kvm-x86-ops.h ++++ b/arch/x86/include/asm/kvm-x86-ops.h +@@ -108,6 +108,7 @@ KVM_X86_OP_OPTIONAL(vcpu_blocking) + KVM_X86_OP_OPTIONAL(vcpu_unblocking) + KVM_X86_OP_OPTIONAL(pi_update_irte) + KVM_X86_OP_OPTIONAL(pi_start_assignment) ++KVM_X86_OP_OPTIONAL(apicv_pre_state_restore) + KVM_X86_OP_OPTIONAL(apicv_post_state_restore) + KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) + KVM_X86_OP_OPTIONAL(set_hv_timer) +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1708,6 +1708,7 @@ struct kvm_x86_ops { + int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); + void (*pi_start_assignment)(struct kvm *kvm); ++ void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); + void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -2670,6 +2670,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vc + u64 msr_val; + int i; + ++ static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu); ++ + if (!init_event) { + msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) +@@ -2981,6 +2983,8 @@ int kvm_apic_set_state(struct kvm_vcpu * + struct kvm_lapic *apic = vcpu->arch.apic; + int r; + ++ static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu); ++ + kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); + /* set SPIV separately to get count of SW disabled APICs right */ + apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6912,7 +6912,7 @@ static void vmx_load_eoi_exitmap(struct + vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); + } + +-static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) ++static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + +@@ -8286,7 +8286,7 @@ static struct kvm_x86_ops vmx_x86_ops __ + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, + .load_eoi_exitmap = vmx_load_eoi_exitmap, +- .apicv_post_state_restore = vmx_apicv_post_state_restore, ++ .apicv_pre_state_restore = vmx_apicv_pre_state_restore, + .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, + .hwapic_irr_update = vmx_hwapic_irr_update, + .hwapic_isr_update = vmx_hwapic_isr_update, diff --git a/queue-6.6/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch b/queue-6.6/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch new file mode 100644 index 00000000000..5c164a80feb --- /dev/null +++ b/queue-6.6/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch @@ -0,0 +1,51 @@ +From d6800af51c76b6dae20e6023bbdc9b3da3ab5121 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Tue, 17 Oct 2023 15:51:02 +0000 +Subject: KVM: x86: hyper-v: Don't auto-enable stimer on write from user-space + +From: Nicolas Saenz Julienne + +commit d6800af51c76b6dae20e6023bbdc9b3da3ab5121 upstream. + +Don't apply the stimer's counter side effects when modifying its +value from user-space, as this may trigger spurious interrupts. + +For example: + - The stimer is configured in auto-enable mode. + - The stimer's count is set and the timer enabled. + - The stimer expires, an interrupt is injected. + - The VM is live migrated. + - The stimer config and count are deserialized, auto-enable is ON, the + stimer is re-enabled. + - The stimer expires right away, and injects an unwarranted interrupt. + +Cc: stable@vger.kernel.org +Fixes: 1f4b34f825e8 ("kvm/x86: Hyper-V SynIC timers") +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Vitaly Kuznetsov +Link: https://lore.kernel.org/r/20231017155101.40677-1-nsaenz@amazon.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/hyperv.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/hyperv.c ++++ b/arch/x86/kvm/hyperv.c +@@ -727,10 +727,12 @@ static int stimer_set_count(struct kvm_v + + stimer_cleanup(stimer); + stimer->count = count; +- if (stimer->count == 0) +- stimer->config.enable = 0; +- else if (stimer->config.auto_enable) +- stimer->config.enable = 1; ++ if (!host) { ++ if (stimer->count == 0) ++ stimer->config.enable = 0; ++ else if (stimer->config.auto_enable) ++ stimer->config.enable = 1; ++ } + + if (stimer->config.enable) + stimer_mark_pending(stimer, false); diff --git a/queue-6.6/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch b/queue-6.6/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch new file mode 100644 index 00000000000..8d163826fee --- /dev/null +++ b/queue-6.6/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch @@ -0,0 +1,78 @@ +From 2770d4722036d6bd24bcb78e9cd7f6e572077d03 Mon Sep 17 00:00:00 2001 +From: "Maciej S. Szmigiero" +Date: Thu, 19 Oct 2023 18:06:57 +0200 +Subject: KVM: x86: Ignore MSR_AMD64_TW_CFG access + +From: Maciej S. Szmigiero + +commit 2770d4722036d6bd24bcb78e9cd7f6e572077d03 upstream. + +Hyper-V enabled Windows Server 2022 KVM VM cannot be started on Zen1 Ryzen +since it crashes at boot with SYSTEM_THREAD_EXCEPTION_NOT_HANDLED + +STATUS_PRIVILEGED_INSTRUCTION (in other words, because of an unexpected #GP +in the guest kernel). + +This is because Windows tries to set bit 8 in MSR_AMD64_TW_CFG and can't +handle receiving a #GP when doing so. + +Give this MSR the same treatment that commit 2e32b7190641 +("x86, kvm: Add MSR_AMD64_BU_CFG2 to the list of ignored MSRs") gave +MSR_AMD64_BU_CFG2 under justification that this MSR is baremetal-relevant +only. +Although apparently it was then needed for Linux guests, not Windows as in +this case. + +With this change, the aforementioned guest setup is able to finish booting +successfully. + +This issue can be reproduced either on a Summit Ridge Ryzen (with +just "-cpu host") or on a Naples EPYC (with "-cpu host,stepping=1" since +EPYC is ordinarily stepping 2). + +Alternatively, userspace could solve the problem by using MSR filters, but +forcing every userspace to define a filter isn't very friendly and doesn't +add much, if any, value. The only potential hiccup is if one of these +"baremetal-only" MSRs ever requires actual emulation and/or has F/M/S +specific behavior. But if that happens, then KVM can still punt *that* +handling to userspace since userspace MSR filters "win" over KVM's default +handling. + +Signed-off-by: Maciej S. Szmigiero +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1ce85d9c7c9e9632393816cf19c902e0a3f411f1.1697731406.git.maciej.szmigiero@oracle.com +[sean: call out MSR filtering alternative] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kvm/x86.c | 2 ++ + 2 files changed, 3 insertions(+) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -553,6 +553,7 @@ + #define MSR_AMD64_CPUID_FN_1 0xc0011004 + #define MSR_AMD64_LS_CFG 0xc0011020 + #define MSR_AMD64_DC_CFG 0xc0011022 ++#define MSR_AMD64_TW_CFG 0xc0011023 + + #define MSR_AMD64_DE_CFG 0xc0011029 + #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3641,6 +3641,7 @@ int kvm_set_msr_common(struct kvm_vcpu * + case MSR_AMD64_PATCH_LOADER: + case MSR_AMD64_BU_CFG2: + case MSR_AMD64_DC_CFG: ++ case MSR_AMD64_TW_CFG: + case MSR_F15H_EX_CFG: + break; + +@@ -4065,6 +4066,7 @@ int kvm_get_msr_common(struct kvm_vcpu * + case MSR_AMD64_BU_CFG2: + case MSR_IA32_PERF_CTL: + case MSR_AMD64_DC_CFG: ++ case MSR_AMD64_TW_CFG: + case MSR_F15H_EX_CFG: + /* + * Intel Sandy Bridge CPUs must support the RAPL (running average power diff --git a/queue-6.6/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch b/queue-6.6/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch new file mode 100644 index 00000000000..aa19a07d535 --- /dev/null +++ b/queue-6.6/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch @@ -0,0 +1,50 @@ +From 5e538fce33589da6d7cb2de1445b84d3a8a692f7 Mon Sep 17 00:00:00 2001 +From: Vikash Garodia +Date: Thu, 10 Aug 2023 07:55:01 +0530 +Subject: media: venus: hfi: add checks to perform sanity on queue pointers + +From: Vikash Garodia + +commit 5e538fce33589da6d7cb2de1445b84d3a8a692f7 upstream. + +Read and write pointers are used to track the packet index in the memory +shared between video driver and firmware. There is a possibility of OOB +access if the read or write pointer goes beyond the queue memory size. +Add checks for the read and write pointer to avoid OOB access. + +Cc: stable@vger.kernel.org +Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files") +Signed-off-by: Vikash Garodia +Signed-off-by: Stanimir Varbanov +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/platform/qcom/venus/hfi_venus.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/media/platform/qcom/venus/hfi_venus.c ++++ b/drivers/media/platform/qcom/venus/hfi_venus.c +@@ -205,6 +205,11 @@ static int venus_write_queue(struct venu + + new_wr_idx = wr_idx + dwords; + wr_ptr = (u32 *)(queue->qmem.kva + (wr_idx << 2)); ++ ++ if (wr_ptr < (u32 *)queue->qmem.kva || ++ wr_ptr > (u32 *)(queue->qmem.kva + queue->qmem.size - sizeof(*wr_ptr))) ++ return -EINVAL; ++ + if (new_wr_idx < qsize) { + memcpy(wr_ptr, packet, dwords << 2); + } else { +@@ -272,6 +277,11 @@ static int venus_read_queue(struct venus + } + + rd_ptr = (u32 *)(queue->qmem.kva + (rd_idx << 2)); ++ ++ if (rd_ptr < (u32 *)queue->qmem.kva || ++ rd_ptr > (u32 *)(queue->qmem.kva + queue->qmem.size - sizeof(*rd_ptr))) ++ return -EINVAL; ++ + dwords = *rd_ptr >> 2; + if (!dwords) + return -EINVAL; diff --git a/queue-6.6/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch b/queue-6.6/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch new file mode 100644 index 00000000000..b16aa1af1dd --- /dev/null +++ b/queue-6.6/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch @@ -0,0 +1,50 @@ +From 85dd3af64965c1c0eb7373b340a1b1f7773586b0 Mon Sep 17 00:00:00 2001 +From: Victor Shih +Date: Tue, 7 Nov 2023 17:57:41 +0800 +Subject: mmc: sdhci-pci-gli: GL9755: Mask the replay timer timeout of AER + +From: Victor Shih + +commit 85dd3af64965c1c0eb7373b340a1b1f7773586b0 upstream. + +Due to a flaw in the hardware design, the GL9755 replay timer frequently +times out when ASPM is enabled. As a result, the warning messages will +often appear in the system log when the system accesses the GL9755 +PCI config. Therefore, the replay timer timeout must be masked. + +Fixes: 36ed2fd32b2c ("mmc: sdhci-pci-gli: A workaround to allow GL9755 to enter ASPM L1.2") +Signed-off-by: Victor Shih +Acked-by: Adrian Hunter +Acked-by: Kai-Heng Feng +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20231107095741.8832-3-victorshihgli@gmail.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sdhci-pci-gli.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/mmc/host/sdhci-pci-gli.c ++++ b/drivers/mmc/host/sdhci-pci-gli.c +@@ -149,6 +149,9 @@ + #define PCI_GLI_9755_PM_CTRL 0xFC + #define PCI_GLI_9755_PM_STATE GENMASK(1, 0) + ++#define PCI_GLI_9755_CORRERR_MASK 0x214 ++#define PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) ++ + #define SDHCI_GLI_9767_GM_BURST_SIZE 0x510 + #define SDHCI_GLI_9767_GM_BURST_SIZE_AXI_ALWAYS_SET BIT(8) + +@@ -756,6 +759,11 @@ static void gl9755_hw_setting(struct sdh + value &= ~PCI_GLI_9755_PM_STATE; + pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value); + ++ /* mask the replay timer timeout of AER */ ++ pci_read_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, &value); ++ value |= PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; ++ pci_write_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, value); ++ + gl9755_wt_off(pdev); + } + diff --git a/queue-6.6/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch b/queue-6.6/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch new file mode 100644 index 00000000000..37a3d943bc9 --- /dev/null +++ b/queue-6.6/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch @@ -0,0 +1,44 @@ +From 15c7ef7341a2e54cfa12ac502c65d6fd2cce2b62 Mon Sep 17 00:00:00 2001 +From: Ilkka Koskinen +Date: Thu, 2 Nov 2023 17:16:54 -0700 +Subject: perf: arm_cspmu: Reject events meant for other PMUs + +From: Ilkka Koskinen + +commit 15c7ef7341a2e54cfa12ac502c65d6fd2cce2b62 upstream. + +Coresight PMU driver didn't reject events meant for other PMUs. +This caused some of the Core PMU events disappearing from +the output of "perf list". In addition, trying to run e.g. + + $ perf stat -e r2 sleep 1 + +made Coresight PMU driver to handle the event instead of letting +Core PMU driver to deal with it. + +Cc: stable@vger.kernel.org +Fixes: e37dfd65731d ("perf: arm_cspmu: Add support for ARM CoreSight PMU driver") +Signed-off-by: Ilkka Koskinen +Acked-by: Will Deacon +Reviewed-by: Besar Wicaksono +Acked-by: Mark Rutland +Reviewed-by: Anshuman Khandual +Link: https://lore.kernel.org/r/20231103001654.35565-1-ilkka@os.amperecomputing.com +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman +--- + drivers/perf/arm_cspmu/arm_cspmu.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/perf/arm_cspmu/arm_cspmu.c ++++ b/drivers/perf/arm_cspmu/arm_cspmu.c +@@ -635,6 +635,9 @@ static int arm_cspmu_event_init(struct p + + cspmu = to_arm_cspmu(event->pmu); + ++ if (event->attr.type != event->pmu->type) ++ return -ENOENT; ++ + /* + * Following other "uncore" PMUs, we do not support sampling mode or + * attach to a task (per-process mode). diff --git a/queue-6.6/perf-core-fix-cpuctx-refcounting.patch b/queue-6.6/perf-core-fix-cpuctx-refcounting.patch new file mode 100644 index 00000000000..59c6adca58a --- /dev/null +++ b/queue-6.6/perf-core-fix-cpuctx-refcounting.patch @@ -0,0 +1,99 @@ +From 889c58b3155ff4c8e8671c95daef63d6fabbb6b1 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 9 Jun 2023 12:34:46 +0200 +Subject: perf/core: Fix cpuctx refcounting + +From: Peter Zijlstra + +commit 889c58b3155ff4c8e8671c95daef63d6fabbb6b1 upstream. + +Audit of the refcounting turned up that perf_pmu_migrate_context() +fails to migrate the ctx refcount. + +Fixes: bd2756811766 ("perf: Rewrite core context handling") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Link: https://lkml.kernel.org/r/20230612093539.085862001@infradead.org +Cc: +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/perf_event.h | 13 ++++++++----- + kernel/events/core.c | 17 +++++++++++++++++ + 2 files changed, 25 insertions(+), 5 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -843,11 +843,11 @@ struct perf_event { + }; + + /* +- * ,-----------------------[1:n]----------------------. +- * V V +- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event +- * ^ ^ | | +- * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' ++ * ,-----------------------[1:n]------------------------. ++ * V V ++ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event ++ * | | ++ * `--[n:1]-> pmu <-[1:n]--' + * + * + * struct perf_event_pmu_context lifetime is refcount based and RCU freed +@@ -865,6 +865,9 @@ struct perf_event { + * ctx->mutex pinning the configuration. Since we hold a reference on + * group_leader (through the filedesc) it can't go away, therefore it's + * associated pmu_ctx must exist and cannot change due to ctx->mutex. ++ * ++ * perf_event holds a refcount on perf_event_context ++ * perf_event holds a refcount on perf_event_pmu_context + */ + struct perf_event_pmu_context { + struct pmu *pmu; +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -4816,6 +4816,11 @@ find_get_pmu_context(struct pmu *pmu, st + void *task_ctx_data = NULL; + + if (!ctx->task) { ++ /* ++ * perf_pmu_migrate_context() / __perf_pmu_install_event() ++ * relies on the fact that find_get_pmu_context() cannot fail ++ * for CPU contexts. ++ */ + struct perf_cpu_pmu_context *cpc; + + cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); +@@ -12877,6 +12882,9 @@ static void __perf_pmu_install_event(str + int cpu, struct perf_event *event) + { + struct perf_event_pmu_context *epc; ++ struct perf_event_context *old_ctx = event->ctx; ++ ++ get_ctx(ctx); /* normally find_get_context() */ + + event->cpu = cpu; + epc = find_get_pmu_context(pmu, ctx, event); +@@ -12885,6 +12893,11 @@ static void __perf_pmu_install_event(str + if (event->state >= PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_INACTIVE; + perf_install_in_context(ctx, event, cpu); ++ ++ /* ++ * Now that event->ctx is updated and visible, put the old ctx. ++ */ ++ put_ctx(old_ctx); + } + + static void __perf_pmu_install(struct perf_event_context *ctx, +@@ -12923,6 +12936,10 @@ void perf_pmu_migrate_context(struct pmu + struct perf_event_context *src_ctx, *dst_ctx; + LIST_HEAD(events); + ++ /* ++ * Since per-cpu context is persistent, no need to grab an extra ++ * reference. ++ */ + src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx; + dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx; + diff --git a/queue-6.6/perf-intel-pt-fix-async-branch-flags.patch b/queue-6.6/perf-intel-pt-fix-async-branch-flags.patch new file mode 100644 index 00000000000..f45610f1f16 --- /dev/null +++ b/queue-6.6/perf-intel-pt-fix-async-branch-flags.patch @@ -0,0 +1,37 @@ +From f2d87895cbc4af80649850dcf5da36de6b2ed3dd Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Thu, 28 Sep 2023 10:29:53 +0300 +Subject: perf intel-pt: Fix async branch flags + +From: Adrian Hunter + +commit f2d87895cbc4af80649850dcf5da36de6b2ed3dd upstream. + +Ensure PERF_IP_FLAG_ASYNC is set always for asynchronous branches (i.e. +interrupts etc). + +Fixes: 90e457f7be08 ("perf tools: Add Intel PT support") +Cc: stable@vger.kernel.org +Signed-off-by: Adrian Hunter +Acked-by: Namhyung Kim +Link: https://lore.kernel.org/r/20230928072953.19369-1-adrian.hunter@intel.com +Signed-off-by: Namhyung Kim +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/intel-pt.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/perf/util/intel-pt.c ++++ b/tools/perf/util/intel-pt.c +@@ -1512,9 +1512,11 @@ static void intel_pt_sample_flags(struct + } else if (ptq->state->flags & INTEL_PT_ASYNC) { + if (!ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | ++ PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + else if (ptq->state->from_nr && !ptq->state->to_nr) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | ++ PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_VMEXIT; + else + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | diff --git a/queue-6.6/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch b/queue-6.6/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch new file mode 100644 index 00000000000..1b25c58bfcb --- /dev/null +++ b/queue-6.6/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch @@ -0,0 +1,44 @@ +From ea142e590aec55ba40c5affb4d49e68c713c63dc Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Thu, 19 Oct 2023 01:34:23 +1000 +Subject: powerpc/perf: Fix disabling BHRB and instruction sampling + +From: Nicholas Piggin + +commit ea142e590aec55ba40c5affb4d49e68c713c63dc upstream. + +When the PMU is disabled, MMCRA is not updated to disable BHRB and +instruction sampling. This can lead to those features remaining enabled, +which can slow down a real or emulated CPU. + +Fixes: 1cade527f6e9 ("powerpc/perf: BHRB control to disable BHRB logic when not used") +Cc: stable@vger.kernel.org # v5.9+ +Signed-off-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20231018153423.298373-1-npiggin@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/perf/core-book3s.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -1371,8 +1371,7 @@ static void power_pmu_disable(struct pmu + /* + * Disable instruction sampling if it was enabled + */ +- if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) +- val &= ~MMCRA_SAMPLE_ENABLE; ++ val &= ~MMCRA_SAMPLE_ENABLE; + + /* Disable BHRB via mmcra (BHRBRD) for p10 */ + if (ppmu->flags & PPMU_ARCH_31) +@@ -1383,7 +1382,7 @@ static void power_pmu_disable(struct pmu + * instruction sampling or BHRB. + */ + if (val != mmcra) { +- mtspr(SPRN_MMCRA, mmcra); ++ mtspr(SPRN_MMCRA, val); + mb(); + isync(); + } diff --git a/queue-6.6/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch b/queue-6.6/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch new file mode 100644 index 00000000000..7e158c6254e --- /dev/null +++ b/queue-6.6/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch @@ -0,0 +1,59 @@ +From 381fdb73d1e2a48244de7260550e453d1003bb8e Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Fri, 6 Oct 2023 21:09:28 -0700 +Subject: randstruct: Fix gcc-plugin performance mode to stay in group + +From: Kees Cook + +commit 381fdb73d1e2a48244de7260550e453d1003bb8e upstream. + +The performance mode of the gcc-plugin randstruct was shuffling struct +members outside of the cache-line groups. Limit the range to the +specified group indexes. + +Cc: linux-hardening@vger.kernel.org +Cc: stable@vger.kernel.org +Reported-by: Lukas Loidolt +Closes: https://lore.kernel.org/all/f3ca77f0-e414-4065-83a5-ae4c4d25545d@student.tuwien.ac.at +Fixes: 313dd1b62921 ("gcc-plugins: Add the randstruct plugin") +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman +--- + scripts/gcc-plugins/randomize_layout_plugin.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/scripts/gcc-plugins/randomize_layout_plugin.c ++++ b/scripts/gcc-plugins/randomize_layout_plugin.c +@@ -191,12 +191,14 @@ static void partition_struct(tree *field + + static void performance_shuffle(tree *newtree, unsigned long length, ranctx *prng_state) + { +- unsigned long i, x; ++ unsigned long i, x, index; + struct partition_group size_group[length]; + unsigned long num_groups = 0; + unsigned long randnum; + + partition_struct(newtree, length, (struct partition_group *)&size_group, &num_groups); ++ ++ /* FIXME: this group shuffle is currently a no-op. */ + for (i = num_groups - 1; i > 0; i--) { + struct partition_group tmp; + randnum = ranval(prng_state) % (i + 1); +@@ -206,11 +208,14 @@ static void performance_shuffle(tree *ne + } + + for (x = 0; x < num_groups; x++) { +- for (i = size_group[x].start + size_group[x].length - 1; i > size_group[x].start; i--) { ++ for (index = size_group[x].length - 1; index > 0; index--) { + tree tmp; ++ ++ i = size_group[x].start + index; + if (DECL_BIT_FIELD_TYPE(newtree[i])) + continue; +- randnum = ranval(prng_state) % (i + 1); ++ randnum = ranval(prng_state) % (index + 1); ++ randnum += size_group[x].start; + // we could handle this case differently if desired + if (DECL_BIT_FIELD_TYPE(newtree[randnum])) + continue; diff --git a/queue-6.6/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch b/queue-6.6/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch new file mode 100644 index 00000000000..db34d7b569b --- /dev/null +++ b/queue-6.6/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch @@ -0,0 +1,47 @@ +From 8e3ed9e786511ad800c33605ed904b9de49323cf Mon Sep 17 00:00:00 2001 +From: Chandrakanth patil +Date: Tue, 3 Oct 2023 16:30:18 +0530 +Subject: scsi: megaraid_sas: Increase register read retry rount from 3 to 30 for selected registers + +From: Chandrakanth patil + +commit 8e3ed9e786511ad800c33605ed904b9de49323cf upstream. + +In BMC environments with concurrent access to multiple registers, certain +registers occasionally yield a value of 0 even after 3 retries due to +hardware errata. As a fix, we have extended the retry count from 3 to 30. + +The same errata applies to the mpt3sas driver, and a similar patch has +been accepted. Please find more details in the mpt3sas patch reference +link. + +Link: https://lore.kernel.org/r/20230829090020.5417-2-ranjan.kumar@broadcom.com +Fixes: 272652fcbf1a ("scsi: megaraid_sas: add retry logic in megasas_readl") +Cc: stable@vger.kernel.org +Signed-off-by: Chandrakanth patil +Signed-off-by: Sumit Saxena +Link: https://lore.kernel.org/r/20231003110021.168862-2-chandrakanth.patil@broadcom.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -263,13 +263,13 @@ u32 megasas_readl(struct megasas_instanc + * Fusion registers could intermittently return all zeroes. + * This behavior is transient in nature and subsequent reads will + * return valid value. As a workaround in driver, retry readl for +- * upto three times until a non-zero value is read. ++ * up to thirty times until a non-zero value is read. + */ + if (instance->adapter_type == AERO_SERIES) { + do { + ret_val = readl(addr); + i++; +- } while (ret_val == 0 && i < 3); ++ } while (ret_val == 0 && i < 30); + return ret_val; + } else { + return readl(addr); diff --git a/queue-6.6/scsi-mpt3sas-fix-loop-logic.patch b/queue-6.6/scsi-mpt3sas-fix-loop-logic.patch new file mode 100644 index 00000000000..00056d7a17e --- /dev/null +++ b/queue-6.6/scsi-mpt3sas-fix-loop-logic.patch @@ -0,0 +1,35 @@ +From 3c978492c333f0c08248a8d51cecbe5eb5f617c9 Mon Sep 17 00:00:00 2001 +From: Ranjan Kumar +Date: Fri, 20 Oct 2023 16:28:49 +0530 +Subject: scsi: mpt3sas: Fix loop logic + +From: Ranjan Kumar + +commit 3c978492c333f0c08248a8d51cecbe5eb5f617c9 upstream. + +The retry loop continues to iterate until the count reaches 30, even after +receiving the correct value. Exit loop when a non-zero value is read. + +Fixes: 4ca10f3e3174 ("scsi: mpt3sas: Perform additional retries if doorbell read returns 0") +Cc: stable@vger.kernel.org +Signed-off-by: Ranjan Kumar +Link: https://lore.kernel.org/r/20231020105849.6350-1-ranjan.kumar@broadcom.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -223,8 +223,8 @@ _base_readl_ext_retry(const void __iomem + + for (i = 0 ; i < 30 ; i++) { + ret_val = readl(addr); +- if (ret_val == 0) +- continue; ++ if (ret_val != 0) ++ break; + } + + return ret_val; diff --git a/queue-6.6/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch b/queue-6.6/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch new file mode 100644 index 00000000000..ecf3a029cfc --- /dev/null +++ b/queue-6.6/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch @@ -0,0 +1,72 @@ +From 19597cad64d608aa8ac2f8aef50a50187a565223 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Mon, 30 Oct 2023 12:19:12 +0530 +Subject: scsi: qla2xxx: Fix system crash due to bad pointer access + +From: Quinn Tran + +commit 19597cad64d608aa8ac2f8aef50a50187a565223 upstream. + +User experiences system crash when running AER error injection. The +perturbation causes the abort-all-I/O path to trigger. The driver assumes +all I/O on this path is FCP only. If there is both NVMe & FCP traffic, a +system crash happens. Add additional check to see if I/O is FCP or not +before access. + +PID: 999019 TASK: ff35d769f24722c0 CPU: 53 COMMAND: "kworker/53:1" + 0 [ff3f78b964847b58] machine_kexec at ffffffffae86973d + 1 [ff3f78b964847ba8] __crash_kexec at ffffffffae9be29d + 2 [ff3f78b964847c70] crash_kexec at ffffffffae9bf528 + 3 [ff3f78b964847c78] oops_end at ffffffffae8282ab + 4 [ff3f78b964847c98] exc_page_fault at ffffffffaf2da502 + 5 [ff3f78b964847cc0] asm_exc_page_fault at ffffffffaf400b62 + [exception RIP: qla2x00_abort_srb+444] + RIP: ffffffffc07b5f8c RSP: ff3f78b964847d78 RFLAGS: 00010046 + RAX: 0000000000000282 RBX: ff35d74a0195a200 RCX: ff35d76886fd03a0 + RDX: 0000000000000001 RSI: ffffffffc07c5ec8 RDI: ff35d74a0195a200 + RBP: ff35d76913d22080 R8: ff35d7694d103200 R9: ff35d7694d103200 + R10: 0000000100000000 R11: ffffffffb05d6630 R12: 0000000000010000 + R13: ff3f78b964847df8 R14: ff35d768d8754000 R15: ff35d768877248e0 + ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + 6 [ff3f78b964847d70] qla2x00_abort_srb at ffffffffc07b5f84 [qla2xxx] + 7 [ff3f78b964847de0] __qla2x00_abort_all_cmds at ffffffffc07b6238 [qla2xxx] + 8 [ff3f78b964847e38] qla2x00_abort_all_cmds at ffffffffc07ba635 [qla2xxx] + 9 [ff3f78b964847e58] qla2x00_terminate_rport_io at ffffffffc08145eb [qla2xxx] +10 [ff3f78b964847e70] fc_terminate_rport_io at ffffffffc045987e [scsi_transport_fc] +11 [ff3f78b964847e88] process_one_work at ffffffffae914f15 +12 [ff3f78b964847ed0] worker_thread at ffffffffae9154c0 +13 [ff3f78b964847f10] kthread at ffffffffae91c456 +14 [ff3f78b964847f50] ret_from_fork at ffffffffae8036ef + +Cc: stable@vger.kernel.org +Fixes: f45bca8c5052 ("scsi: qla2xxx: Fix double scsi_done for abort path") +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20231030064912.37912-1-njavali@marvell.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_os.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -1836,8 +1836,16 @@ static void qla2x00_abort_srb(struct qla + } + + spin_lock_irqsave(qp->qp_lock_ptr, *flags); +- if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd))) +- sp->done(sp, res); ++ switch (sp->type) { ++ case SRB_SCSI_CMD: ++ if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd))) ++ sp->done(sp, res); ++ break; ++ default: ++ if (ret_cmd) ++ sp->done(sp, res); ++ break; ++ } + } else { + sp->done(sp, res); + } diff --git a/queue-6.6/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch b/queue-6.6/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch new file mode 100644 index 00000000000..5ed877cacb3 --- /dev/null +++ b/queue-6.6/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch @@ -0,0 +1,51 @@ +From 27900d7119c464b43cd9eac69c85884d17bae240 Mon Sep 17 00:00:00 2001 +From: Peter Wang +Date: Mon, 6 Nov 2023 15:51:17 +0800 +Subject: scsi: ufs: core: Fix racing issue between ufshcd_mcq_abort() and ISR + +From: Peter Wang + +commit 27900d7119c464b43cd9eac69c85884d17bae240 upstream. + +If command timeout happens and cq complete IRQ is raised at the same time, +ufshcd_mcq_abort clears lprb->cmd and a NULL pointer deref happens in the +ISR. Error log: + +ufshcd_abort: Device abort task at tag 18 +Unable to handle kernel NULL pointer dereference at virtual address +0000000000000108 +pc : [0xffffffe27ef867ac] scsi_dma_unmap+0xc/0x44 +lr : [0xffffffe27f1b898c] ufshcd_release_scsi_cmd+0x24/0x114 + +Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()") +Cc: stable@vger.kernel.org +Signed-off-by: Peter Wang +Link: https://lore.kernel.org/r/20231106075117.8995-1-peter.wang@mediatek.com +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ufs/core/ufs-mcq.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/ufs/core/ufs-mcq.c ++++ b/drivers/ufs/core/ufs-mcq.c +@@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *c + int tag = scsi_cmd_to_rq(cmd)->tag; + struct ufshcd_lrb *lrbp = &hba->lrb[tag]; + struct ufs_hw_queue *hwq; ++ unsigned long flags; + int err = FAILED; + + if (!ufshcd_cmd_inflight(lrbp->cmd)) { +@@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *c + } + + err = SUCCESS; ++ spin_lock_irqsave(&hwq->cq_lock, flags); + if (ufshcd_cmd_inflight(lrbp->cmd)) + ufshcd_release_scsi_cmd(hba, lrbp); ++ spin_unlock_irqrestore(&hwq->cq_lock, flags); + + out: + return err; diff --git a/queue-6.6/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch b/queue-6.6/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch new file mode 100644 index 00000000000..d8b4743904a --- /dev/null +++ b/queue-6.6/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch @@ -0,0 +1,71 @@ +From fc88ca19ad0989dc0e4d4b126d5d0ba91f6cb616 Mon Sep 17 00:00:00 2001 +From: Manivannan Sadhasivam +Date: Fri, 8 Sep 2023 20:23:28 +0530 +Subject: scsi: ufs: qcom: Update PHY settings only when scaling to higher gears + +From: Manivannan Sadhasivam + +commit fc88ca19ad0989dc0e4d4b126d5d0ba91f6cb616 upstream. + +The "hs_gear" variable is used to program the PHY settings (submode) during +ufs_qcom_power_up_sequence(). Currently, it is being updated every time the +agreed gear changes. Due to this, if the gear got downscaled before suspend +(runtime/system), then while resuming, the PHY settings for the lower gear +will be applied first and later when scaling to max gear with REINIT, the +PHY settings for the max gear will be applied. + +This adds a latency while resuming and also really not needed as the PHY +gear settings are backwards compatible i.e., we can continue using the PHY +settings for max gear with lower gear speed. + +So let's update the "hs_gear" variable _only_ when the agreed gear is +greater than the current one. This guarantees that the PHY settings will be +changed only during probe time and fatal error condition. + +Due to this, UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH can now be skipped +when the PM operation is in progress. + +Cc: stable@vger.kernel.org +Fixes: 96a7141da332 ("scsi: ufs: core: Add support for reinitializing the UFS device") +Reported-by: Can Guo +Signed-off-by: Manivannan Sadhasivam +Link: https://lore.kernel.org/r/20230908145329.154024-1-manivannan.sadhasivam@linaro.org +Reviewed-by: Can Guo +Tested-by: Can Guo +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ufs/core/ufshcd.c | 3 ++- + drivers/ufs/host/ufs-qcom.c | 9 +++++++-- + 2 files changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -8723,7 +8723,8 @@ static int ufshcd_probe_hba(struct ufs_h + if (ret) + goto out; + +- if (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH) { ++ if (!hba->pm_op_in_progress && ++ (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH)) { + /* Reset the device and controller before doing reinit */ + ufshcd_device_reset(hba); + ufshcd_hba_stop(hba); +--- a/drivers/ufs/host/ufs-qcom.c ++++ b/drivers/ufs/host/ufs-qcom.c +@@ -909,8 +909,13 @@ static int ufs_qcom_pwr_change_notify(st + return ret; + } + +- /* Use the agreed gear */ +- host->hs_gear = dev_req_params->gear_tx; ++ /* ++ * Update hs_gear only when the gears are scaled to a higher value. This is because, ++ * the PHY gear settings are backwards compatible and we only need to change the PHY ++ * settings while scaling to higher gears. ++ */ ++ if (dev_req_params->gear_tx > host->hs_gear) ++ host->hs_gear = dev_req_params->gear_tx; + + /* enable the device ref clock before changing to HS mode */ + if (!ufshcd_is_hs_mode(&hba->pwr_info) && diff --git a/queue-6.6/series b/queue-6.6/series index 6e09018e4f3..c82996cacef 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -233,3 +233,29 @@ tools-power-turbostat-enable-the-c-state-pre-wake-pr.patch scsi-ufs-core-expand-mcq-queue-slot-to-devicequeuede.patch cifs-spnego-add-in-host_key_len.patch cifs-fix-check-of-rc-in-function-generate_smb3signin.patch +perf-core-fix-cpuctx-refcounting.patch +i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch +perf-arm_cspmu-reject-events-meant-for-other-pmus.patch +drivers-perf-check-find_first_bit-return-value.patch +media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch +perf-intel-pt-fix-async-branch-flags.patch +powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch +randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch +spi-fix-null-dereference-on-suspend.patch +bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch +bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch +scsi-mpt3sas-fix-loop-logic.patch +scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch +scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch +scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch +scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch +x86-shstk-delay-signal-entry-ssp-write-until-after-user-accesses.patch +crypto-x86-sha-load-modules-based-on-cpu-features.patch +x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch +x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch +x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch +kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch +kvm-x86-ignore-msr_amd64_tw_cfg-access.patch +kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch +kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch +mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch diff --git a/queue-6.6/spi-fix-null-dereference-on-suspend.patch b/queue-6.6/spi-fix-null-dereference-on-suspend.patch new file mode 100644 index 00000000000..23c5b75c832 --- /dev/null +++ b/queue-6.6/spi-fix-null-dereference-on-suspend.patch @@ -0,0 +1,146 @@ +From bef4a48f4ef798c4feddf045d49e53c8a97d5e37 Mon Sep 17 00:00:00 2001 +From: Mark Hasemeyer +Date: Tue, 7 Nov 2023 14:47:43 -0700 +Subject: spi: Fix null dereference on suspend + +From: Mark Hasemeyer + +commit bef4a48f4ef798c4feddf045d49e53c8a97d5e37 upstream. + +A race condition exists where a synchronous (noqueue) transfer can be +active during a system suspend. This can cause a null pointer +dereference exception to occur when the system resumes. + +Example order of events leading to the exception: +1. spi_sync() calls __spi_transfer_message_noqueue() which sets + ctlr->cur_msg +2. Spi transfer begins via spi_transfer_one_message() +3. System is suspended interrupting the transfer context +4. System is resumed +6. spi_controller_resume() calls spi_start_queue() which resets cur_msg + to NULL +7. Spi transfer context resumes and spi_finalize_current_message() is + called which dereferences cur_msg (which is now NULL) + +Wait for synchronous transfers to complete before suspending by +acquiring the bus mutex and setting/checking a suspend flag. + +Signed-off-by: Mark Hasemeyer +Link: https://lore.kernel.org/r/20231107144743.v1.1.I7987f05f61901f567f7661763646cb7d7919b528@changeid +Signed-off-by: Mark Brown +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi.c | 56 +++++++++++++++++++++++++++++++++--------------- + include/linux/spi/spi.h | 1 + 2 files changed, 40 insertions(+), 17 deletions(-) + +--- a/drivers/spi/spi.c ++++ b/drivers/spi/spi.c +@@ -3323,33 +3323,52 @@ void spi_unregister_controller(struct sp + } + EXPORT_SYMBOL_GPL(spi_unregister_controller); + ++static inline int __spi_check_suspended(const struct spi_controller *ctlr) ++{ ++ return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0; ++} ++ ++static inline void __spi_mark_suspended(struct spi_controller *ctlr) ++{ ++ mutex_lock(&ctlr->bus_lock_mutex); ++ ctlr->flags |= SPI_CONTROLLER_SUSPENDED; ++ mutex_unlock(&ctlr->bus_lock_mutex); ++} ++ ++static inline void __spi_mark_resumed(struct spi_controller *ctlr) ++{ ++ mutex_lock(&ctlr->bus_lock_mutex); ++ ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED; ++ mutex_unlock(&ctlr->bus_lock_mutex); ++} ++ + int spi_controller_suspend(struct spi_controller *ctlr) + { +- int ret; ++ int ret = 0; + + /* Basically no-ops for non-queued controllers */ +- if (!ctlr->queued) +- return 0; +- +- ret = spi_stop_queue(ctlr); +- if (ret) +- dev_err(&ctlr->dev, "queue stop failed\n"); ++ if (ctlr->queued) { ++ ret = spi_stop_queue(ctlr); ++ if (ret) ++ dev_err(&ctlr->dev, "queue stop failed\n"); ++ } + ++ __spi_mark_suspended(ctlr); + return ret; + } + EXPORT_SYMBOL_GPL(spi_controller_suspend); + + int spi_controller_resume(struct spi_controller *ctlr) + { +- int ret; +- +- if (!ctlr->queued) +- return 0; ++ int ret = 0; + +- ret = spi_start_queue(ctlr); +- if (ret) +- dev_err(&ctlr->dev, "queue restart failed\n"); ++ __spi_mark_resumed(ctlr); + ++ if (ctlr->queued) { ++ ret = spi_start_queue(ctlr); ++ if (ret) ++ dev_err(&ctlr->dev, "queue restart failed\n"); ++ } + return ret; + } + EXPORT_SYMBOL_GPL(spi_controller_resume); +@@ -4153,8 +4172,7 @@ static void __spi_transfer_message_noque + ctlr->cur_msg = msg; + ret = __spi_pump_transfer_message(ctlr, msg, was_busy); + if (ret) +- goto out; +- ++ dev_err(&ctlr->dev, "noqueue transfer failed\n"); + ctlr->cur_msg = NULL; + ctlr->fallback = false; + +@@ -4170,7 +4188,6 @@ static void __spi_transfer_message_noque + spi_idle_runtime_pm(ctlr); + } + +-out: + mutex_unlock(&ctlr->io_mutex); + } + +@@ -4193,6 +4210,11 @@ static int __spi_sync(struct spi_device + int status; + struct spi_controller *ctlr = spi->controller; + ++ if (__spi_check_suspended(ctlr)) { ++ dev_warn_once(&spi->dev, "Attempted to sync while suspend\n"); ++ return -ESHUTDOWN; ++ } ++ + status = __spi_validate(spi, message); + if (status != 0) + return status; +--- a/include/linux/spi/spi.h ++++ b/include/linux/spi/spi.h +@@ -566,6 +566,7 @@ struct spi_controller { + #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ + #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ + #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ ++#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ + + /* Flag indicating if the allocation of this struct is devres-managed */ + bool devm_allocated; diff --git a/queue-6.6/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch b/queue-6.6/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch new file mode 100644 index 00000000000..efadb6ec642 --- /dev/null +++ b/queue-6.6/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch @@ -0,0 +1,191 @@ +From b56ebe7c896dc78b5865ec2c4b1dae3c93537517 Mon Sep 17 00:00:00 2001 +From: Koichiro Den +Date: Thu, 26 Oct 2023 12:20:36 +0900 +Subject: x86/apic/msi: Fix misconfigured non-maskable MSI quirk + +From: Koichiro Den + +commit b56ebe7c896dc78b5865ec2c4b1dae3c93537517 upstream. + +commit ef8dd01538ea ("genirq/msi: Make interrupt allocation less +convoluted"), reworked the code so that the x86 specific quirk for affinity +setting of non-maskable PCI/MSI interrupts is not longer activated if +necessary. + +This could be solved by restoring the original logic in the core MSI code, +but after a deeper analysis it turned out that the quirk flag is not +required at all. + +The quirk is only required when the PCI/MSI device cannot mask the MSI +interrupts, which in turn also prevents reservation mode from being enabled +for the affected interrupt. + +This allows ot remove the NOMASK quirk bit completely as msi_set_affinity() +can instead check whether reservation mode is enabled for the interrupt, +which gives exactly the same answer. + +Even in the momentary non-existing case that the reservation mode would be +not set for a maskable MSI interrupt this would not cause any harm as it +just would cause msi_set_affinity() to go needlessly through the +functionaly equivalent slow path, which works perfectly fine with maskable +interrupts as well. + +Rework msi_set_affinity() to query the reservation mode and remove all +NOMASK quirk logic from the core code. + +[ tglx: Massaged changelog ] + +Fixes: ef8dd01538ea ("genirq/msi: Make interrupt allocation less convoluted") +Suggested-by: Thomas Gleixner +Signed-off-by: Koichiro Den +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20231026032036.2462428-1-den@valinux.co.jp +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/apic/msi.c | 8 +++----- + include/linux/irq.h | 26 ++++---------------------- + include/linux/msi.h | 6 ------ + kernel/irq/debugfs.c | 1 - + kernel/irq/msi.c | 12 +----------- + 5 files changed, 8 insertions(+), 45 deletions(-) + +--- a/arch/x86/kernel/apic/msi.c ++++ b/arch/x86/kernel/apic/msi.c +@@ -55,14 +55,14 @@ msi_set_affinity(struct irq_data *irqd, + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: +- * - The MSI is maskable (remapped MSI does not use this code path)). +- * The quirk bit is not set in this case. ++ * - The MSI is maskable (remapped MSI does not use this code path). ++ * The reservation mode bit is set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The interrupt is not yet started up + * - The new destination CPU is the same as the old destination CPU + */ +- if (!irqd_msi_nomask_quirk(irqd) || ++ if (!irqd_can_reserve(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + !irqd_is_started(irqd) || +@@ -215,8 +215,6 @@ static bool x86_init_dev_msi_info(struct + if (WARN_ON_ONCE(domain != real_parent)) + return false; + info->chip->irq_set_affinity = msi_set_affinity; +- /* See msi_set_affinity() for the gory details */ +- info->flags |= MSI_FLAG_NOMASK_QUIRK; + break; + case DOMAIN_BUS_DMAR: + case DOMAIN_BUS_AMDVI: +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -215,8 +215,6 @@ struct irq_data { + * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target + * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode +- * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change +- * required + * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked + * from actual interrupt context. + * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call +@@ -247,11 +245,10 @@ enum { + IRQD_SINGLE_TARGET = BIT(24), + IRQD_DEFAULT_TRIGGER_SET = BIT(25), + IRQD_CAN_RESERVE = BIT(26), +- IRQD_MSI_NOMASK_QUIRK = BIT(27), +- IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), +- IRQD_AFFINITY_ON_ACTIVATE = BIT(29), +- IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), +- IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), ++ IRQD_HANDLE_ENFORCE_IRQCTX = BIT(27), ++ IRQD_AFFINITY_ON_ACTIVATE = BIT(28), ++ IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29), ++ IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30), + }; + + #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) +@@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(stru + return __irqd_to_state(d) & IRQD_CAN_RESERVE; + } + +-static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) +-{ +- __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; +-} +- +-static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) +-{ +- __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; +-} +- +-static inline bool irqd_msi_nomask_quirk(struct irq_data *d) +-{ +- return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; +-} +- + static inline void irqd_set_affinity_on_activate(struct irq_data *d) + { + __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; +--- a/include/linux/msi.h ++++ b/include/linux/msi.h +@@ -547,12 +547,6 @@ enum { + MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), + /* Free MSI descriptors */ + MSI_FLAG_FREE_MSI_DESCS = (1 << 6), +- /* +- * Quirk to handle MSI implementations which do not provide +- * masking. Currently known to affect x86, but has to be partially +- * handled in the core MSI code. +- */ +- MSI_FLAG_NOMASK_QUIRK = (1 << 7), + + /* Mask for the generic functionality */ + MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), +--- a/kernel/irq/debugfs.c ++++ b/kernel/irq/debugfs.c +@@ -121,7 +121,6 @@ static const struct irq_bit_descr irqdat + BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE), + BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), +- BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), + + BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), + +--- a/kernel/irq/msi.c ++++ b/kernel/irq/msi.c +@@ -1204,7 +1204,6 @@ static int msi_handle_pci_fail(struct ir + + #define VIRQ_CAN_RESERVE 0x01 + #define VIRQ_ACTIVATE 0x02 +-#define VIRQ_NOMASK_QUIRK 0x04 + + static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags) + { +@@ -1213,8 +1212,6 @@ static int msi_init_virq(struct irq_doma + + if (!(vflags & VIRQ_CAN_RESERVE)) { + irqd_clr_can_reserve(irqd); +- if (vflags & VIRQ_NOMASK_QUIRK) +- irqd_set_msi_nomask_quirk(irqd); + + /* + * If the interrupt is managed but no CPU is available to +@@ -1275,15 +1272,8 @@ static int __msi_domain_alloc_irqs(struc + * Interrupt can use a reserved vector and will not occupy + * a real device vector until the interrupt is requested. + */ +- if (msi_check_reservation_mode(domain, info, dev)) { ++ if (msi_check_reservation_mode(domain, info, dev)) + vflags |= VIRQ_CAN_RESERVE; +- /* +- * MSI affinity setting requires a special quirk (X86) when +- * reservation mode is active. +- */ +- if (info->flags & MSI_FLAG_NOMASK_QUIRK) +- vflags |= VIRQ_NOMASK_QUIRK; +- } + + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED)) diff --git a/queue-6.6/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch b/queue-6.6/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch new file mode 100644 index 00000000000..890737d6e90 --- /dev/null +++ b/queue-6.6/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch @@ -0,0 +1,42 @@ +From ee545b94d39a00c93dc98b1dbcbcf731d2eadeb4 Mon Sep 17 00:00:00 2001 +From: Pu Wen +Date: Mon, 14 Aug 2023 10:18:26 +0200 +Subject: x86/cpu/hygon: Fix the CPU topology evaluation for real + +From: Pu Wen + +commit ee545b94d39a00c93dc98b1dbcbcf731d2eadeb4 upstream. + +Hygon processors with a model ID > 3 have CPUID leaf 0xB correctly +populated and don't need the fixed package ID shift workaround. The fixup +is also incorrect when running in a guest. + +Fixes: e0ceeae708ce ("x86/CPU/hygon: Fix phys_proc_id calculation logic for multi-die processors") +Signed-off-by: Pu Wen +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: +Link: https://lore.kernel.org/r/tencent_594804A808BD93A4EBF50A994F228E3A7F07@qq.com +Link: https://lore.kernel.org/r/20230814085112.089607918@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/hygon.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -87,8 +87,12 @@ static void hygon_get_topology(struct cp + if (!err) + c->x86_coreid_bits = get_count_order(c->x86_max_cores); + +- /* Socket ID is ApicId[6] for these processors. */ +- c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; ++ /* ++ * Socket ID is ApicId[6] for the processors with model <= 0x3 ++ * when running on host. ++ */ ++ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3) ++ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; + + cacheinfo_hygon_init_llc_id(c, cpu); + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { diff --git a/queue-6.6/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch b/queue-6.6/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch new file mode 100644 index 00000000000..2c20fd08469 --- /dev/null +++ b/queue-6.6/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch @@ -0,0 +1,119 @@ +From 7d08f21f8c6307cb05cabb8d86e90ff6ccba57e9 Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Wed, 4 Oct 2023 09:49:59 -0500 +Subject: x86/PCI: Avoid PME from D3hot/D3cold for AMD Rembrandt and Phoenix USB4 + +From: Mario Limonciello + +commit 7d08f21f8c6307cb05cabb8d86e90ff6ccba57e9 upstream. + +Iain reports that USB devices can't be used to wake a Lenovo Z13 from +suspend. This occurs because on some AMD platforms, even though the Root +Ports advertise PME_Support for D3hot and D3cold, wakeup events from +devices on a USB4 controller don't result in wakeup interrupts from the +Root Port when amd-pmc has put the platform in a hardware sleep state. + +If amd-pmc will be involved in the suspend, remove D3hot and D3cold from +the PME_Support mask of Root Ports above USB4 controllers so we avoid those +states if we need wakeups. + +Restore D3 support at resume so that it can be used by runtime suspend. + +This affects both AMD Rembrandt and Phoenix SoCs. + +"pm_suspend_target_state == PM_SUSPEND_ON" means we're doing runtime +suspend, and amd-pmc will not be involved. In that case PMEs work as +advertised in D3hot/D3cold, so we don't need to do anything. + +Note that amd-pmc is technically optional, and there's no need for this +quirk if it's not present, but we assume it's always present because power +consumption is so high without it. + +Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") +Link: https://lore.kernel.org/r/20231004144959.158840-1-mario.limonciello@amd.com +Reported-by: Iain Lane +Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121 +Signed-off-by: Mario Limonciello +[bhelgaas: commit log, move to arch/x86/pci/fixup.c, add #includes] +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/pci/fixup.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 59 insertions(+) + +--- a/arch/x86/pci/fixup.c ++++ b/arch/x86/pci/fixup.c +@@ -3,9 +3,11 @@ + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + */ + ++#include + #include + #include + #include ++#include + #include + #include + #include +@@ -904,3 +906,60 @@ static void chromeos_fixup_apl_pci_l1ss_ + } + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_save_apl_pci_l1ss_capability); + DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_fixup_apl_pci_l1ss_capability); ++ ++#ifdef CONFIG_SUSPEND ++/* ++ * Root Ports on some AMD SoCs advertise PME_Support for D3hot and D3cold, but ++ * if the SoC is put into a hardware sleep state by the amd-pmc driver, the ++ * Root Ports don't generate wakeup interrupts for USB devices. ++ * ++ * When suspending, remove D3hot and D3cold from the PME_Support advertised ++ * by the Root Port so we don't use those states if we're expecting wakeup ++ * interrupts. Restore the advertised PME_Support when resuming. ++ */ ++static void amd_rp_pme_suspend(struct pci_dev *dev) ++{ ++ struct pci_dev *rp; ++ ++ /* ++ * PM_SUSPEND_ON means we're doing runtime suspend, which means ++ * amd-pmc will not be involved so PMEs during D3 work as advertised. ++ * ++ * The PMEs *do* work if amd-pmc doesn't put the SoC in the hardware ++ * sleep state, but we assume amd-pmc is always present. ++ */ ++ if (pm_suspend_target_state == PM_SUSPEND_ON) ++ return; ++ ++ rp = pcie_find_root_port(dev); ++ if (!rp->pm_cap) ++ return; ++ ++ rp->pme_support &= ~((PCI_PM_CAP_PME_D3hot|PCI_PM_CAP_PME_D3cold) >> ++ PCI_PM_CAP_PME_SHIFT); ++ dev_info_once(&rp->dev, "quirk: disabling D3cold for suspend\n"); ++} ++ ++static void amd_rp_pme_resume(struct pci_dev *dev) ++{ ++ struct pci_dev *rp; ++ u16 pmc; ++ ++ rp = pcie_find_root_port(dev); ++ if (!rp->pm_cap) ++ return; ++ ++ pci_read_config_word(rp, rp->pm_cap + PCI_PM_PMC, &pmc); ++ rp->pme_support = FIELD_GET(PCI_PM_CAP_PME_MASK, pmc); ++} ++/* Rembrandt (yellow_carp) */ ++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162e, amd_rp_pme_suspend); ++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162e, amd_rp_pme_resume); ++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162f, amd_rp_pme_suspend); ++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162f, amd_rp_pme_resume); ++/* Phoenix (pink_sardine) */ ++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1668, amd_rp_pme_suspend); ++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1668, amd_rp_pme_resume); ++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1669, amd_rp_pme_suspend); ++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1669, amd_rp_pme_resume); ++#endif /* CONFIG_SUSPEND */ diff --git a/queue-6.6/x86-shstk-delay-signal-entry-ssp-write-until-after-user-accesses.patch b/queue-6.6/x86-shstk-delay-signal-entry-ssp-write-until-after-user-accesses.patch new file mode 100644 index 00000000000..3016c243a32 --- /dev/null +++ b/queue-6.6/x86-shstk-delay-signal-entry-ssp-write-until-after-user-accesses.patch @@ -0,0 +1,99 @@ +From 31255e072b2e91f97645d792d25b2db744186dd1 Mon Sep 17 00:00:00 2001 +From: Rick Edgecombe +Date: Tue, 7 Nov 2023 10:22:51 -0800 +Subject: x86/shstk: Delay signal entry SSP write until after user accesses +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rick Edgecombe + +commit 31255e072b2e91f97645d792d25b2db744186dd1 upstream. + +When a signal is being delivered, the kernel needs to make accesses to +userspace. These accesses could encounter an access error, in which case +the signal delivery itself will trigger a segfault. Usually this would +result in the kernel killing the process. But in the case of a SEGV signal +handler being configured, the failure of the first signal delivery will +result in *another* signal getting delivered. The second signal may +succeed if another thread has resolved the issue that triggered the +segfault (i.e. a well timed mprotect()/mmap()), or the second signal is +being delivered to another stack (i.e. an alt stack). + +On x86, in the non-shadow stack case, all the accesses to userspace are +done before changes to the registers (in pt_regs). The operation is +aborted when an access error occurs, so although there may be writes done +for the first signal, control flow changes for the signal (regs->ip, +regs->sp, etc) are not committed until all the accesses have already +completed successfully. This means that the second signal will be +delivered as if it happened at the time of the first signal. It will +effectively replace the first aborted signal, overwriting the half-written +frame of the aborted signal. So on sigreturn from the second signal, +control flow will resume happily from the point of control flow where the +original signal was delivered. + +The problem is, when shadow stack is active, the shadow stack SSP +register/MSR is updated *before* some of the userspace accesses. This +means if the earlier accesses succeed and the later ones fail, the second +signal will not be delivered at the same spot on the shadow stack as the +first one. So on sigreturn from the second signal, the SSP will be +pointing to the wrong location on the shadow stack (off by a frame). + +Pengfei privately reported that while using a shadow stack enabled glibc, +the “signal06” test in the LTP test-suite hung. It turns out it is +testing the above described double signal scenario. When this test was +compiled with shadow stack, the first signal pushed a shadow stack +sigframe, then the second pushed another. When the second signal was +handled, the SSP was at the first shadow stack signal frame instead of +the original location. The test then got stuck as the #CP from the twice +incremented SSP was incorrect and generated segfaults in a loop. + +Fix this by adjusting the SSP register only after any userspace accesses, +such that there can be no failures after the SSP is adjusted. Do this by +moving the shadow stack sigframe push logic to happen after all other +userspace accesses. + +Note, sigreturn (as opposed to the signal delivery dealt with in this +patch) has ordering behavior that could lead to similar failures. The +ordering issues there extend beyond shadow stack to include the alt stack +restoration. Fixing that would require cross-arch changes, and the +ordering today does not cause any known test or apps breakages. So leave +it as is, for now. + +[ dhansen: minor changelog/subject tweak ] + +Fixes: 05e36022c054 ("x86/shstk: Handle signals for shadow stack") +Reported-by: Pengfei Xu +Signed-off-by: Rick Edgecombe +Signed-off-by: Dave Hansen +Tested-by: Pengfei Xu +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/20231107182251.91276-1-rick.p.edgecombe%40intel.com +Link: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/signal/signal06.c +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/signal_64.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/signal_64.c ++++ b/arch/x86/kernel/signal_64.c +@@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *k + frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp); + uc_flags = frame_uc_flags(regs); + +- if (setup_signal_shadow_stack(ksig)) +- return -EFAULT; +- + if (!user_access_begin(frame, sizeof(*frame))) + return -EFAULT; + +@@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *k + return -EFAULT; + } + ++ if (setup_signal_shadow_stack(ksig)) ++ return -EFAULT; ++ + /* Set up registers for signal handler */ + regs->di = ksig->sig; + /* In case the signal handler was declared without prototypes */