6.5-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 20 Nov 2023 15:32:08 +0000 (16:32 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 20 Nov 2023 15:32:08 +0000 (16:32 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 Nov 2023 15:32:08 +0000 (16:32 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 Nov 2023 15:32:08 +0000 (16:32 +0100)
diff --git a/queue-6.5/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch b/queue-6.5/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch

new file mode 100644 (file)

index 0000000..000b98f
--- /dev/null
+++ b/queue-6.5/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch
@@ -0,0 +1,68 @@
+From 811c363645b33e6e22658634329e95f383dfc705 Mon Sep 17 00:00:00 2001
+From: Hao Sun <sunhao.th@gmail.com>
+Date: Wed, 1 Nov 2023 13:33:51 +0100
+Subject: bpf: Fix check_stack_write_fixed_off() to correctly spill imm
+
+From: Hao Sun <sunhao.th@gmail.com>
+
+commit 811c363645b33e6e22658634329e95f383dfc705 upstream.
+
+In check_stack_write_fixed_off(), imm value is cast to u32 before being
+spilled to the stack. Therefore, the sign information is lost, and the
+range information is incorrect when load from the stack again.
+
+For the following prog:
+0: r2 = r10
+1: *(u64*)(r2 -40) = -44
+2: r0 = *(u64*)(r2 - 40)
+3: if r0 s<= 0xa goto +2
+4: r0 = 1
+5: exit
+6: r0  = 0
+7: exit
+
+The verifier gives:
+func#0 @0
+0: R1=ctx(off=0,imm=0) R10=fp0
+0: (bf) r2 = r10                      ; R2_w=fp0 R10=fp0
+1: (7a) *(u64 *)(r2 -40) = -44        ; R2_w=fp0 fp-40_w=4294967252
+2: (79) r0 = *(u64 *)(r2 -40)         ; R0_w=4294967252 R2_w=fp0
+fp-40_w=4294967252
+3: (c5) if r0 s< 0xa goto pc+2
+mark_precise: frame0: last_idx 3 first_idx 0 subseq_idx -1
+mark_precise: frame0: regs=r0 stack= before 2: (79) r0 = *(u64 *)(r2 -40)
+3: R0_w=4294967252
+4: (b7) r0 = 1                        ; R0_w=1
+5: (95) exit
+verification time 7971 usec
+stack depth 40
+processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0
+peak_states 0 mark_read 0
+
+So remove the incorrect cast, since imm field is declared as s32, and
+__mark_reg_known() takes u64, so imm would be correctly sign extended
+by compiler.
+
+Fixes: ecdf985d7615 ("bpf: track immediate values written to stack by BPF_ST instruction")
+Cc: stable@vger.kernel.org
+Signed-off-by: Hao Sun <sunhao.th@gmail.com>
+Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Acked-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231101-fix-check-stack-write-v3-1-f05c2b1473d5@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4368,7 +4368,7 @@ static int check_stack_write_fixed_off(s
+                  insn->imm != 0 && env->bpf_capable) {
+               struct bpf_reg_state fake_reg = {};
+ 
+-              __mark_reg_known(&fake_reg, (u32)insn->imm);
++              __mark_reg_known(&fake_reg, insn->imm);
+               fake_reg.type = SCALAR_VALUE;
+               save_register_state(state, spi, &fake_reg, size);
+       } else if (reg && is_spillable_regtype(reg->type)) {
diff --git a/queue-6.5/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch b/queue-6.5/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch

new file mode 100644 (file)

index 0000000..65cee13
--- /dev/null
+++ b/queue-6.5/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch
@@ -0,0 +1,60 @@
+From 291d044fd51f8484066300ee42afecf8c8db7b3a Mon Sep 17 00:00:00 2001
+From: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Date: Thu, 2 Nov 2023 13:39:03 +0800
+Subject: bpf: Fix precision tracking for BPF_ALU | BPF_TO_BE | BPF_END
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+
+commit 291d044fd51f8484066300ee42afecf8c8db7b3a upstream.
+
+BPF_END and BPF_NEG has a different specification for the source bit in
+the opcode compared to other ALU/ALU64 instructions, and is either
+reserved or use to specify the byte swap endianness. In both cases the
+source bit does not encode source operand location, and src_reg is a
+reserved field.
+
+backtrack_insn() currently does not differentiate BPF_END and BPF_NEG
+from other ALU/ALU64 instructions, which leads to r0 being incorrectly
+marked as precise when processing BPF_ALU | BPF_TO_BE | BPF_END
+instructions. This commit teaches backtrack_insn() to correctly mark
+precision for such case.
+
+While precise tracking of BPF_NEG and other BPF_END instructions are
+correct and does not need fixing, this commit opt to process all BPF_NEG
+and BPF_END instructions within the same if-clause to better align with
+current convention used in the verifier (e.g. check_alu_op).
+
+Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking")
+Cc: stable@vger.kernel.org
+Reported-by: Mohamed Mahmoud <mmahmoud@redhat.com>
+Closes: https://lore.kernel.org/r/87jzrrwptf.fsf@toke.dk
+Tested-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Tested-by: Tao Lyu <tao.lyu@epfl.ch>
+Acked-by: Eduard Zingerman <eddyz87@gmail.com>
+Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Link: https://lore.kernel.org/r/20231102053913.12004-2-shung-hsi.yu@suse.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -3436,7 +3436,12 @@ static int backtrack_insn(struct bpf_ver
+       if (class == BPF_ALU || class == BPF_ALU64) {
+               if (!bt_is_reg_set(bt, dreg))
+                       return 0;
+-              if (opcode == BPF_MOV) {
++              if (opcode == BPF_END || opcode == BPF_NEG) {
++                      /* sreg is reserved and unused
++                       * dreg still need precision before this insn
++                       */
++                      return 0;
++              } else if (opcode == BPF_MOV) {
+                       if (BPF_SRC(insn->code) == BPF_X) {
+                               /* dreg = sreg
+                                * dreg needs precision after this insn
diff --git a/queue-6.5/crypto-x86-sha-load-modules-based-on-cpu-features.patch b/queue-6.5/crypto-x86-sha-load-modules-based-on-cpu-features.patch

new file mode 100644 (file)

index 0000000..7d205df
--- /dev/null
+++ b/queue-6.5/crypto-x86-sha-load-modules-based-on-cpu-features.patch
@@ -0,0 +1,102 @@
+From 1c43c0f1f84aa59dfc98ce66f0a67b2922aa7f9d Mon Sep 17 00:00:00 2001
+From: Roxana Nicolescu <roxana.nicolescu@canonical.com>
+Date: Fri, 15 Sep 2023 12:23:25 +0200
+Subject: crypto: x86/sha - load modules based on CPU features
+
+From: Roxana Nicolescu <roxana.nicolescu@canonical.com>
+
+commit 1c43c0f1f84aa59dfc98ce66f0a67b2922aa7f9d upstream.
+
+x86 optimized crypto modules are built as modules rather than build-in and
+they are not loaded when the crypto API is initialized, resulting in the
+generic builtin module (sha1-generic) being used instead.
+
+It was discovered when creating a sha1/sha256 checksum of a 2Gb file by
+using kcapi-tools because it would take significantly longer than creating
+a sha512 checksum of the same file. trace-cmd showed that for sha1/256 the
+generic module was used, whereas for sha512 the optimized module was used
+instead.
+
+Add module aliases() for these x86 optimized crypto modules based on CPU
+feature bits so udev gets a chance to load them later in the boot
+process. This resulted in ~3x decrease in the real-time execution of
+kcapi-dsg.
+
+Fix is inspired from commit
+aa031b8f702e ("crypto: x86/sha512 - load based on CPU features")
+where a similar fix was done for sha512.
+
+Cc: stable@vger.kernel.org # 5.15+
+Suggested-by: Dimitri John Ledkov <dimitri.ledkov@canonical.com>
+Suggested-by: Julian Andres Klode <julian.klode@canonical.com>
+Signed-off-by: Roxana Nicolescu <roxana.nicolescu@canonical.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/sha1_ssse3_glue.c   |   12 ++++++++++++
+ arch/x86/crypto/sha256_ssse3_glue.c |   12 ++++++++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/arch/x86/crypto/sha1_ssse3_glue.c
++++ b/arch/x86/crypto/sha1_ssse3_glue.c
+@@ -24,8 +24,17 @@
+ #include <linux/types.h>
+ #include <crypto/sha1.h>
+ #include <crypto/sha1_base.h>
++#include <asm/cpu_device_id.h>
+ #include <asm/simd.h>
+ 
++static const struct x86_cpu_id module_cpu_ids[] = {
++      X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
++      X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
++      X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
++      {}
++};
++MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
++
+ static int sha1_update(struct shash_desc *desc, const u8 *data,
+                            unsigned int len, sha1_block_fn *sha1_xform)
+ {
+@@ -301,6 +310,9 @@ static inline void unregister_sha1_ni(vo
+ 
+ static int __init sha1_ssse3_mod_init(void)
+ {
++      if (!x86_match_cpu(module_cpu_ids))
++              return -ENODEV;
++
+       if (register_sha1_ssse3())
+               goto fail;
+ 
+--- a/arch/x86/crypto/sha256_ssse3_glue.c
++++ b/arch/x86/crypto/sha256_ssse3_glue.c
+@@ -38,11 +38,20 @@
+ #include <crypto/sha2.h>
+ #include <crypto/sha256_base.h>
+ #include <linux/string.h>
++#include <asm/cpu_device_id.h>
+ #include <asm/simd.h>
+ 
+ asmlinkage void sha256_transform_ssse3(struct sha256_state *state,
+                                      const u8 *data, int blocks);
+ 
++static const struct x86_cpu_id module_cpu_ids[] = {
++      X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
++      X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
++      X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
++      {}
++};
++MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
++
+ static int _sha256_update(struct shash_desc *desc, const u8 *data,
+                         unsigned int len, sha256_block_fn *sha256_xform)
+ {
+@@ -366,6 +375,9 @@ static inline void unregister_sha256_ni(
+ 
+ static int __init sha256_ssse3_mod_init(void)
+ {
++      if (!x86_match_cpu(module_cpu_ids))
++              return -ENODEV;
++
+       if (register_sha256_ssse3())
+               goto fail;
+ 
diff --git a/queue-6.5/drivers-perf-check-find_first_bit-return-value.patch b/queue-6.5/drivers-perf-check-find_first_bit-return-value.patch

new file mode 100644 (file)

index 0000000..87150ee
--- /dev/null
+++ b/queue-6.5/drivers-perf-check-find_first_bit-return-value.patch
@@ -0,0 +1,67 @@
+From c6e316ac05532febb0c966fa9b55f5258ed037be Mon Sep 17 00:00:00 2001
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+Date: Thu, 9 Nov 2023 09:21:28 +0100
+Subject: drivers: perf: Check find_first_bit() return value
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+commit c6e316ac05532febb0c966fa9b55f5258ed037be upstream.
+
+We must check the return value of find_first_bit() before using the
+return value as an index array since it happens to overflow the array
+and then panic:
+
+[  107.318430] Kernel BUG [#1]
+[  107.319434] CPU: 3 PID: 1238 Comm: kill Tainted: G            E      6.6.0-rc6ubuntu-defconfig #2
+[  107.319465] Hardware name: riscv-virtio,qemu (DT)
+[  107.319551] epc : pmu_sbi_ovf_handler+0x3a4/0x3ae
+[  107.319840]  ra : pmu_sbi_ovf_handler+0x52/0x3ae
+[  107.319868] epc : ffffffff80a0a77c ra : ffffffff80a0a42a sp : ffffaf83fecda350
+[  107.319884]  gp : ffffffff823961a8 tp : ffffaf8083db1dc0 t0 : ffffaf83fecda480
+[  107.319899]  t1 : ffffffff80cafe62 t2 : 000000000000ff00 s0 : ffffaf83fecda520
+[  107.319921]  s1 : ffffaf83fecda380 a0 : 00000018fca29df0 a1 : ffffffffffffffff
+[  107.319936]  a2 : 0000000001073734 a3 : 0000000000000004 a4 : 0000000000000000
+[  107.319951]  a5 : 0000000000000040 a6 : 000000001d1c8774 a7 : 0000000000504d55
+[  107.319965]  s2 : ffffffff82451f10 s3 : ffffffff82724e70 s4 : 000000000000003f
+[  107.319980]  s5 : 0000000000000011 s6 : ffffaf8083db27c0 s7 : 0000000000000000
+[  107.319995]  s8 : 0000000000000001 s9 : 00007fffb45d6558 s10: 00007fffb45d81a0
+[  107.320009]  s11: ffffaf7ffff60000 t3 : 0000000000000004 t4 : 0000000000000000
+[  107.320023]  t5 : ffffaf7f80000000 t6 : ffffaf8000000000
+[  107.320037] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
+[  107.320081] [<ffffffff80a0a77c>] pmu_sbi_ovf_handler+0x3a4/0x3ae
+[  107.320112] [<ffffffff800b42d0>] handle_percpu_devid_irq+0x9e/0x1a0
+[  107.320131] [<ffffffff800ad92c>] generic_handle_domain_irq+0x28/0x36
+[  107.320148] [<ffffffff8065f9f8>] riscv_intc_irq+0x36/0x4e
+[  107.320166] [<ffffffff80caf4a0>] handle_riscv_irq+0x54/0x86
+[  107.320189] [<ffffffff80cb0036>] do_irq+0x64/0x96
+[  107.320271] Code: 85a6 855e b097 ff7f 80e7 9220 b709 9002 4501 bbd9 (9002) 6097
+[  107.320585] ---[ end trace 0000000000000000 ]---
+[  107.320704] Kernel panic - not syncing: Fatal exception in interrupt
+[  107.320775] SMP: stopping secondary CPUs
+[  107.321219] Kernel Offset: 0x0 from 0xffffffff80000000
+[  107.333051] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
+
+Fixes: 4905ec2fb7e6 ("RISC-V: Add sscofpmf extension support")
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20231109082128.40777-1-alexghiti@rivosinc.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/perf/riscv_pmu_sbi.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/perf/riscv_pmu_sbi.c
++++ b/drivers/perf/riscv_pmu_sbi.c
+@@ -629,6 +629,11 @@ static irqreturn_t pmu_sbi_ovf_handler(i
+ 
+       /* Firmware counter don't support overflow yet */
+       fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
++      if (fidx == RISCV_MAX_COUNTERS) {
++              csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
++              return IRQ_NONE;
++      }
++
+       event = cpu_hw_evt->events[fidx];
+       if (!event) {
+               csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
diff --git a/queue-6.5/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch b/queue-6.5/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch

new file mode 100644 (file)

index 0000000..2c33dfc
--- /dev/null
+++ b/queue-6.5/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch
@@ -0,0 +1,71 @@
+From 471aa951bf1206d3c10d0daa67005b8e4db4ff83 Mon Sep 17 00:00:00 2001
+From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Date: Fri, 27 Oct 2023 10:28:22 -0700
+Subject: i915/perf: Fix NULL deref bugs with drm_dbg() calls
+
+From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+
+commit 471aa951bf1206d3c10d0daa67005b8e4db4ff83 upstream.
+
+When i915 perf interface is not available dereferencing it will lead to
+NULL dereferences.
+
+As returning -ENOTSUPP is pretty clear return when perf interface is not
+available.
+
+Fixes: 2fec539112e8 ("i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call")
+Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Cc: <stable@vger.kernel.org> # v6.0+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231027172822.2753059-1-harshit.m.mogalapalli@oracle.com
+[tursulin: added stable tag]
+(cherry picked from commit 36f27350ff745bd228ab04d7845dfbffc177a889)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_perf.c |   15 +++------------
+ 1 file changed, 3 insertions(+), 12 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_perf.c
++++ b/drivers/gpu/drm/i915/i915_perf.c
+@@ -4286,11 +4286,8 @@ int i915_perf_open_ioctl(struct drm_devi
+       u32 known_open_flags;
+       int ret;
+ 
+-      if (!perf->i915) {
+-              drm_dbg(&perf->i915->drm,
+-                      "i915 perf interface not available for this system\n");
++      if (!perf->i915)
+               return -ENOTSUPP;
+-      }
+ 
+       known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
+                          I915_PERF_FLAG_FD_NONBLOCK |
+@@ -4666,11 +4663,8 @@ int i915_perf_add_config_ioctl(struct dr
+       struct i915_oa_reg *regs;
+       int err, id;
+ 
+-      if (!perf->i915) {
+-              drm_dbg(&perf->i915->drm,
+-                      "i915 perf interface not available for this system\n");
++      if (!perf->i915)
+               return -ENOTSUPP;
+-      }
+ 
+       if (!perf->metrics_kobj) {
+               drm_dbg(&perf->i915->drm,
+@@ -4832,11 +4826,8 @@ int i915_perf_remove_config_ioctl(struct
+       struct i915_oa_config *oa_config;
+       int ret;
+ 
+-      if (!perf->i915) {
+-              drm_dbg(&perf->i915->drm,
+-                      "i915 perf interface not available for this system\n");
++      if (!perf->i915)
+               return -ENOTSUPP;
+-      }
+ 
+       if (i915_perf_stream_paranoid && !perfmon_capable()) {
+               drm_dbg(&perf->i915->drm,
diff --git a/queue-6.5/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch b/queue-6.5/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch

new file mode 100644 (file)

index 0000000..9470f23
--- /dev/null
+++ b/queue-6.5/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch
@@ -0,0 +1,85 @@
+From 629d3698f6958ee6f8131ea324af794f973b12ac Mon Sep 17 00:00:00 2001
+From: Tao Su <tao1.su@linux.intel.com>
+Date: Thu, 14 Sep 2023 13:55:04 +0800
+Subject: KVM: x86: Clear bit12 of ICR after APIC-write VM-exit
+
+From: Tao Su <tao1.su@linux.intel.com>
+
+commit 629d3698f6958ee6f8131ea324af794f973b12ac upstream.
+
+When IPI virtualization is enabled, a WARN is triggered if bit12 of ICR
+MSR is set after APIC-write VM-exit. The reason is kvm_apic_send_ipi()
+thinks the APIC_ICR_BUSY bit should be cleared because KVM has no delay,
+but kvm_apic_write_nodecode() doesn't clear the APIC_ICR_BUSY bit.
+
+Under the x2APIC section, regarding ICR, the SDM says:
+
+  It remains readable only to aid in debugging; however, software should
+  not assume the value returned by reading the ICR is the last written
+  value.
+
+I.e. the guest is allowed to set bit 12.  However, the SDM also gives KVM
+free reign to do whatever it wants with the bit, so long as KVM's behavior
+doesn't confuse userspace or break KVM's ABI.
+
+Clear bit 12 so that it reads back as '0'. This approach is safer than
+"do nothing" and is consistent with the case where IPI virtualization is
+disabled or not supported, i.e.,
+
+  handle_fastpath_set_x2apic_icr_irqoff() -> kvm_x2apic_icr_write()
+
+Opportunistically replace the TODO with a comment calling out that eating
+the write is likely faster than a conditional branch around the busy bit.
+
+Link: https://lore.kernel.org/all/ZPj6iF0Q7iynn62p@google.com/
+Fixes: 5413bcba7ed5 ("KVM: x86: Add support for vICR APIC-write VM-Exits in x2APIC mode")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tao Su <tao1.su@linux.intel.com>
+Tested-by: Yi Lai <yi1.lai@intel.com>
+Reviewed-by: Chao Gao <chao.gao@intel.com>
+Link: https://lore.kernel.org/r/20230914055504.151365-1-tao1.su@linux.intel.com
+[sean: tweak changelog, replace TODO with comment, drop local "val"]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |   26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2423,22 +2423,22 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
+ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+ {
+       struct kvm_lapic *apic = vcpu->arch.apic;
+-      u64 val;
+ 
+       /*
+-       * ICR is a single 64-bit register when x2APIC is enabled.  For legacy
+-       * xAPIC, ICR writes need to go down the common (slightly slower) path
+-       * to get the upper half from ICR2.
++       * ICR is a single 64-bit register when x2APIC is enabled, all others
++       * registers hold 32-bit values.  For legacy xAPIC, ICR writes need to
++       * go down the common path to get the upper half from ICR2.
++       *
++       * Note, using the write helpers may incur an unnecessary write to the
++       * virtual APIC state, but KVM needs to conditionally modify the value
++       * in certain cases, e.g. to clear the ICR busy bit.  The cost of extra
++       * conditional branches is likely a wash relative to the cost of the
++       * maybe-unecessary write, and both are in the noise anyways.
+        */
+-      if (apic_x2apic_mode(apic) && offset == APIC_ICR) {
+-              val = kvm_lapic_get_reg64(apic, APIC_ICR);
+-              kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
+-              trace_kvm_apic_write(APIC_ICR, val);
+-      } else {
+-              /* TODO: optimize to just emulate side effect w/o one more write */
+-              val = kvm_lapic_get_reg(apic, offset);
+-              kvm_lapic_reg_write(apic, offset, (u32)val);
+-      }
++      if (apic_x2apic_mode(apic) && offset == APIC_ICR)
++              kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR));
++      else
++              kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
+ }
+ EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
+ 
diff --git a/queue-6.5/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch b/queue-6.5/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch

new file mode 100644 (file)

index 0000000..e25fe6c
--- /dev/null
+++ b/queue-6.5/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch
@@ -0,0 +1,117 @@
+From 9cfec6d097c607e36199cf0cfbb8cf5acbd8e9b2 Mon Sep 17 00:00:00 2001
+From: Haitao Shan <hshan@google.com>
+Date: Tue, 12 Sep 2023 16:55:45 -0700
+Subject: KVM: x86: Fix lapic timer interrupt lost after loading a snapshot.
+
+From: Haitao Shan <hshan@google.com>
+
+commit 9cfec6d097c607e36199cf0cfbb8cf5acbd8e9b2 upstream.
+
+When running android emulator (which is based on QEMU 2.12) on
+certain Intel hosts with kernel version 6.3-rc1 or above, guest
+will freeze after loading a snapshot. This is almost 100%
+reproducible. By default, the android emulator will use snapshot
+to speed up the next launching of the same android guest. So
+this breaks the android emulator badly.
+
+I tested QEMU 8.0.4 from Debian 12 with an Ubuntu 22.04 guest by
+running command "loadvm" after "savevm". The same issue is
+observed. At the same time, none of our AMD platforms is impacted.
+More experiments show that loading the KVM module with
+"enable_apicv=false" can workaround it.
+
+The issue started to show up after commit 8e6ed96cdd50 ("KVM: x86:
+fire timer when it is migrated and expired, and in oneshot mode").
+However, as is pointed out by Sean Christopherson, it is introduced
+by commit 967235d32032 ("KVM: vmx: clear pending interrupts on
+KVM_SET_LAPIC"). commit 8e6ed96cdd50 ("KVM: x86: fire timer when
+it is migrated and expired, and in oneshot mode") just makes it
+easier to hit the issue.
+
+Having both commits, the oneshot lapic timer gets fired immediately
+inside the KVM_SET_LAPIC call when loading the snapshot. On Intel
+platforms with APIC virtualization and posted interrupt processing,
+this eventually leads to setting the corresponding PIR bit. However,
+the whole PIR bits get cleared later in the same KVM_SET_LAPIC call
+by apicv_post_state_restore. This leads to timer interrupt lost.
+
+The fix is to move vmx_apicv_post_state_restore to the beginning of
+the KVM_SET_LAPIC call and rename to vmx_apicv_pre_state_restore.
+What vmx_apicv_post_state_restore does is actually clearing any
+former apicv state and this behavior is more suitable to carry out
+in the beginning.
+
+Fixes: 967235d32032 ("KVM: vmx: clear pending interrupts on KVM_SET_LAPIC")
+Cc: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Haitao Shan <hshan@google.com>
+Link: https://lore.kernel.org/r/20230913000215.478387-1-hshan@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm-x86-ops.h |    1 +
+ arch/x86/include/asm/kvm_host.h    |    1 +
+ arch/x86/kvm/lapic.c               |    4 ++++
+ arch/x86/kvm/vmx/vmx.c             |    4 ++--
+ 4 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/kvm-x86-ops.h
++++ b/arch/x86/include/asm/kvm-x86-ops.h
+@@ -108,6 +108,7 @@ KVM_X86_OP_OPTIONAL(vcpu_blocking)
+ KVM_X86_OP_OPTIONAL(vcpu_unblocking)
+ KVM_X86_OP_OPTIONAL(pi_update_irte)
+ KVM_X86_OP_OPTIONAL(pi_start_assignment)
++KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
+ KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
+ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
+ KVM_X86_OP_OPTIONAL(set_hv_timer)
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1690,6 +1690,7 @@ struct kvm_x86_ops {
+       int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
+                             uint32_t guest_irq, bool set);
+       void (*pi_start_assignment)(struct kvm *kvm);
++      void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
+       void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+       bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
+ 
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2649,6 +2649,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vc
+       u64 msr_val;
+       int i;
+ 
++      static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu);
++
+       if (!init_event) {
+               msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
+               if (kvm_vcpu_is_reset_bsp(vcpu))
+@@ -2960,6 +2962,8 @@ int kvm_apic_set_state(struct kvm_vcpu *
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       int r;
+ 
++      static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu);
++
+       kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
+       /* set SPIV separately to get count of SW disabled APICs right */
+       apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6909,7 +6909,7 @@ static void vmx_load_eoi_exitmap(struct
+       vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
+ }
+ 
+-static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
++static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 
+@@ -8275,7 +8275,7 @@ static struct kvm_x86_ops vmx_x86_ops __
+       .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
+       .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
+       .load_eoi_exitmap = vmx_load_eoi_exitmap,
+-      .apicv_post_state_restore = vmx_apicv_post_state_restore,
++      .apicv_pre_state_restore = vmx_apicv_pre_state_restore,
+       .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
+       .hwapic_irr_update = vmx_hwapic_irr_update,
+       .hwapic_isr_update = vmx_hwapic_isr_update,
diff --git a/queue-6.5/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch b/queue-6.5/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch

new file mode 100644 (file)

index 0000000..5c164a8
--- /dev/null
+++ b/queue-6.5/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch
@@ -0,0 +1,51 @@
+From d6800af51c76b6dae20e6023bbdc9b3da3ab5121 Mon Sep 17 00:00:00 2001
+From: Nicolas Saenz Julienne <nsaenz@amazon.com>
+Date: Tue, 17 Oct 2023 15:51:02 +0000
+Subject: KVM: x86: hyper-v: Don't auto-enable stimer on write from user-space
+
+From: Nicolas Saenz Julienne <nsaenz@amazon.com>
+
+commit d6800af51c76b6dae20e6023bbdc9b3da3ab5121 upstream.
+
+Don't apply the stimer's counter side effects when modifying its
+value from user-space, as this may trigger spurious interrupts.
+
+For example:
+ - The stimer is configured in auto-enable mode.
+ - The stimer's count is set and the timer enabled.
+ - The stimer expires, an interrupt is injected.
+ - The VM is live migrated.
+ - The stimer config and count are deserialized, auto-enable is ON, the
+   stimer is re-enabled.
+ - The stimer expires right away, and injects an unwarranted interrupt.
+
+Cc: stable@vger.kernel.org
+Fixes: 1f4b34f825e8 ("kvm/x86: Hyper-V SynIC timers")
+Signed-off-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://lore.kernel.org/r/20231017155101.40677-1-nsaenz@amazon.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -727,10 +727,12 @@ static int stimer_set_count(struct kvm_v
+ 
+       stimer_cleanup(stimer);
+       stimer->count = count;
+-      if (stimer->count == 0)
+-              stimer->config.enable = 0;
+-      else if (stimer->config.auto_enable)
+-              stimer->config.enable = 1;
++      if (!host) {
++              if (stimer->count == 0)
++                      stimer->config.enable = 0;
++              else if (stimer->config.auto_enable)
++                      stimer->config.enable = 1;
++      }
+ 
+       if (stimer->config.enable)
+               stimer_mark_pending(stimer, false);
diff --git a/queue-6.5/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch b/queue-6.5/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch

new file mode 100644 (file)

index 0000000..5c39380
--- /dev/null
+++ b/queue-6.5/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch
@@ -0,0 +1,78 @@
+From 2770d4722036d6bd24bcb78e9cd7f6e572077d03 Mon Sep 17 00:00:00 2001
+From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
+Date: Thu, 19 Oct 2023 18:06:57 +0200
+Subject: KVM: x86: Ignore MSR_AMD64_TW_CFG access
+
+From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+
+commit 2770d4722036d6bd24bcb78e9cd7f6e572077d03 upstream.
+
+Hyper-V enabled Windows Server 2022 KVM VM cannot be started on Zen1 Ryzen
+since it crashes at boot with SYSTEM_THREAD_EXCEPTION_NOT_HANDLED +
+STATUS_PRIVILEGED_INSTRUCTION (in other words, because of an unexpected #GP
+in the guest kernel).
+
+This is because Windows tries to set bit 8 in MSR_AMD64_TW_CFG and can't
+handle receiving a #GP when doing so.
+
+Give this MSR the same treatment that commit 2e32b7190641
+("x86, kvm: Add MSR_AMD64_BU_CFG2 to the list of ignored MSRs") gave
+MSR_AMD64_BU_CFG2 under justification that this MSR is baremetal-relevant
+only.
+Although apparently it was then needed for Linux guests, not Windows as in
+this case.
+
+With this change, the aforementioned guest setup is able to finish booting
+successfully.
+
+This issue can be reproduced either on a Summit Ridge Ryzen (with
+just "-cpu host") or on a Naples EPYC (with "-cpu host,stepping=1" since
+EPYC is ordinarily stepping 2).
+
+Alternatively, userspace could solve the problem by using MSR filters, but
+forcing every userspace to define a filter isn't very friendly and doesn't
+add much, if any, value.  The only potential hiccup is if one of these
+"baremetal-only" MSRs ever requires actual emulation and/or has F/M/S
+specific behavior.  But if that happens, then KVM can still punt *that*
+handling to userspace since userspace MSR filters "win" over KVM's default
+handling.
+
+Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1ce85d9c7c9e9632393816cf19c902e0a3f411f1.1697731406.git.maciej.szmigiero@oracle.com
+[sean: call out MSR filtering alternative]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    1 +
+ arch/x86/kvm/x86.c               |    2 ++
+ 2 files changed, 3 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -553,6 +553,7 @@
+ #define MSR_AMD64_CPUID_FN_1          0xc0011004
+ #define MSR_AMD64_LS_CFG              0xc0011020
+ #define MSR_AMD64_DC_CFG              0xc0011022
++#define MSR_AMD64_TW_CFG              0xc0011023
+ 
+ #define MSR_AMD64_DE_CFG              0xc0011029
+ #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT  1
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3643,6 +3643,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
+       case MSR_AMD64_PATCH_LOADER:
+       case MSR_AMD64_BU_CFG2:
+       case MSR_AMD64_DC_CFG:
++      case MSR_AMD64_TW_CFG:
+       case MSR_F15H_EX_CFG:
+               break;
+ 
+@@ -4067,6 +4068,7 @@ int kvm_get_msr_common(struct kvm_vcpu *
+       case MSR_AMD64_BU_CFG2:
+       case MSR_IA32_PERF_CTL:
+       case MSR_AMD64_DC_CFG:
++      case MSR_AMD64_TW_CFG:
+       case MSR_F15H_EX_CFG:
+       /*
+        * Intel Sandy Bridge CPUs must support the RAPL (running average power
diff --git a/queue-6.5/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch b/queue-6.5/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch

new file mode 100644 (file)

index 0000000..aa19a07
--- /dev/null
+++ b/queue-6.5/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch
@@ -0,0 +1,50 @@
+From 5e538fce33589da6d7cb2de1445b84d3a8a692f7 Mon Sep 17 00:00:00 2001
+From: Vikash Garodia <quic_vgarodia@quicinc.com>
+Date: Thu, 10 Aug 2023 07:55:01 +0530
+Subject: media: venus: hfi: add checks to perform sanity on queue pointers
+
+From: Vikash Garodia <quic_vgarodia@quicinc.com>
+
+commit 5e538fce33589da6d7cb2de1445b84d3a8a692f7 upstream.
+
+Read and write pointers are used to track the packet index in the memory
+shared between video driver and firmware. There is a possibility of OOB
+access if the read or write pointer goes beyond the queue memory size.
+Add checks for the read and write pointer to avoid OOB access.
+
+Cc: stable@vger.kernel.org
+Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files")
+Signed-off-by: Vikash Garodia <quic_vgarodia@quicinc.com>
+Signed-off-by: Stanimir Varbanov <stanimir.k.varbanov@gmail.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/platform/qcom/venus/hfi_venus.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/media/platform/qcom/venus/hfi_venus.c
++++ b/drivers/media/platform/qcom/venus/hfi_venus.c
+@@ -205,6 +205,11 @@ static int venus_write_queue(struct venu
+ 
+       new_wr_idx = wr_idx + dwords;
+       wr_ptr = (u32 *)(queue->qmem.kva + (wr_idx << 2));
++
++      if (wr_ptr < (u32 *)queue->qmem.kva ||
++          wr_ptr > (u32 *)(queue->qmem.kva + queue->qmem.size - sizeof(*wr_ptr)))
++              return -EINVAL;
++
+       if (new_wr_idx < qsize) {
+               memcpy(wr_ptr, packet, dwords << 2);
+       } else {
+@@ -272,6 +277,11 @@ static int venus_read_queue(struct venus
+       }
+ 
+       rd_ptr = (u32 *)(queue->qmem.kva + (rd_idx << 2));
++
++      if (rd_ptr < (u32 *)queue->qmem.kva ||
++          rd_ptr > (u32 *)(queue->qmem.kva + queue->qmem.size - sizeof(*rd_ptr)))
++              return -EINVAL;
++
+       dwords = *rd_ptr >> 2;
+       if (!dwords)
+               return -EINVAL;
diff --git a/queue-6.5/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch b/queue-6.5/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch

new file mode 100644 (file)

index 0000000..b16aa1a
--- /dev/null
+++ b/queue-6.5/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch
@@ -0,0 +1,50 @@
+From 85dd3af64965c1c0eb7373b340a1b1f7773586b0 Mon Sep 17 00:00:00 2001
+From: Victor Shih <victor.shih@genesyslogic.com.tw>
+Date: Tue, 7 Nov 2023 17:57:41 +0800
+Subject: mmc: sdhci-pci-gli: GL9755: Mask the replay timer timeout of AER
+
+From: Victor Shih <victor.shih@genesyslogic.com.tw>
+
+commit 85dd3af64965c1c0eb7373b340a1b1f7773586b0 upstream.
+
+Due to a flaw in the hardware design, the GL9755 replay timer frequently
+times out when ASPM is enabled. As a result, the warning messages will
+often appear in the system log when the system accesses the GL9755
+PCI config. Therefore, the replay timer timeout must be masked.
+
+Fixes: 36ed2fd32b2c ("mmc: sdhci-pci-gli: A workaround to allow GL9755 to enter ASPM L1.2")
+Signed-off-by: Victor Shih <victor.shih@genesyslogic.com.tw>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Acked-by: Kai-Heng Feng <kai.heng.geng@canonical.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20231107095741.8832-3-victorshihgli@gmail.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-pci-gli.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/mmc/host/sdhci-pci-gli.c
++++ b/drivers/mmc/host/sdhci-pci-gli.c
+@@ -149,6 +149,9 @@
+ #define PCI_GLI_9755_PM_CTRL     0xFC
+ #define   PCI_GLI_9755_PM_STATE    GENMASK(1, 0)
+ 
++#define PCI_GLI_9755_CORRERR_MASK                             0x214
++#define   PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT        BIT(12)
++
+ #define SDHCI_GLI_9767_GM_BURST_SIZE                  0x510
+ #define   SDHCI_GLI_9767_GM_BURST_SIZE_AXI_ALWAYS_SET   BIT(8)
+ 
+@@ -756,6 +759,11 @@ static void gl9755_hw_setting(struct sdh
+       value &= ~PCI_GLI_9755_PM_STATE;
+       pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value);
+ 
++      /* mask the replay timer timeout of AER */
++      pci_read_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, &value);
++      value |= PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT;
++      pci_write_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, value);
++
+       gl9755_wt_off(pdev);
+ }
+ 
diff --git a/queue-6.5/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch b/queue-6.5/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch

new file mode 100644 (file)

index 0000000..37a3d94
--- /dev/null
+++ b/queue-6.5/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch
@@ -0,0 +1,44 @@
+From 15c7ef7341a2e54cfa12ac502c65d6fd2cce2b62 Mon Sep 17 00:00:00 2001
+From: Ilkka Koskinen <ilkka@os.amperecomputing.com>
+Date: Thu, 2 Nov 2023 17:16:54 -0700
+Subject: perf: arm_cspmu: Reject events meant for other PMUs
+
+From: Ilkka Koskinen <ilkka@os.amperecomputing.com>
+
+commit 15c7ef7341a2e54cfa12ac502c65d6fd2cce2b62 upstream.
+
+Coresight PMU driver didn't reject events meant for other PMUs.
+This caused some of the Core PMU events disappearing from
+the output of "perf list". In addition, trying to run e.g.
+
+     $ perf stat -e r2 sleep 1
+
+made Coresight PMU driver to handle the event instead of letting
+Core PMU driver to deal with it.
+
+Cc: stable@vger.kernel.org
+Fixes: e37dfd65731d ("perf: arm_cspmu: Add support for ARM CoreSight PMU driver")
+Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
+Acked-by: Will Deacon <will@kernel.org>
+Reviewed-by: Besar Wicaksono <bwicaksono@nvidia.com>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Link: https://lore.kernel.org/r/20231103001654.35565-1-ilkka@os.amperecomputing.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/perf/arm_cspmu/arm_cspmu.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/perf/arm_cspmu/arm_cspmu.c
++++ b/drivers/perf/arm_cspmu/arm_cspmu.c
+@@ -635,6 +635,9 @@ static int arm_cspmu_event_init(struct p
+ 
+       cspmu = to_arm_cspmu(event->pmu);
+ 
++      if (event->attr.type != event->pmu->type)
++              return -ENOENT;
++
+       /*
+        * Following other "uncore" PMUs, we do not support sampling mode or
+        * attach to a task (per-process mode).
diff --git a/queue-6.5/perf-core-fix-cpuctx-refcounting.patch b/queue-6.5/perf-core-fix-cpuctx-refcounting.patch

new file mode 100644 (file)

index 0000000..e766969
--- /dev/null
+++ b/queue-6.5/perf-core-fix-cpuctx-refcounting.patch
@@ -0,0 +1,99 @@
+From 889c58b3155ff4c8e8671c95daef63d6fabbb6b1 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 9 Jun 2023 12:34:46 +0200
+Subject: perf/core: Fix cpuctx refcounting
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 889c58b3155ff4c8e8671c95daef63d6fabbb6b1 upstream.
+
+Audit of the refcounting turned up that perf_pmu_migrate_context()
+fails to migrate the ctx refcount.
+
+Fixes: bd2756811766 ("perf: Rewrite core context handling")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/20230612093539.085862001@infradead.org
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/perf_event.h |   13 ++++++++-----
+ kernel/events/core.c       |   17 +++++++++++++++++
+ 2 files changed, 25 insertions(+), 5 deletions(-)
+
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -843,11 +843,11 @@ struct perf_event {
+ };
+ 
+ /*
+- *           ,-----------------------[1:n]----------------------.
+- *           V                                                  V
+- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
+- *           ^                      ^     |                     |
+- *           `--------[1:n]---------'     `-[n:1]-> pmu <-[1:n]-'
++ *           ,-----------------------[1:n]------------------------.
++ *           V                                                    V
++ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
++ *                                        |                       |
++ *                                        `--[n:1]-> pmu <-[1:n]--'
+  *
+  *
+  * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
+@@ -865,6 +865,9 @@ struct perf_event {
+  * ctx->mutex pinning the configuration. Since we hold a reference on
+  * group_leader (through the filedesc) it can't go away, therefore it's
+  * associated pmu_ctx must exist and cannot change due to ctx->mutex.
++ *
++ * perf_event holds a refcount on perf_event_context
++ * perf_event holds a refcount on perf_event_pmu_context
+  */
+ struct perf_event_pmu_context {
+       struct pmu                      *pmu;
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -4816,6 +4816,11 @@ find_get_pmu_context(struct pmu *pmu, st
+       void *task_ctx_data = NULL;
+ 
+       if (!ctx->task) {
++              /*
++               * perf_pmu_migrate_context() / __perf_pmu_install_event()
++               * relies on the fact that find_get_pmu_context() cannot fail
++               * for CPU contexts.
++               */
+               struct perf_cpu_pmu_context *cpc;
+ 
+               cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
+@@ -12888,6 +12893,9 @@ static void __perf_pmu_install_event(str
+                                    int cpu, struct perf_event *event)
+ {
+       struct perf_event_pmu_context *epc;
++      struct perf_event_context *old_ctx = event->ctx;
++
++      get_ctx(ctx); /* normally find_get_context() */
+ 
+       event->cpu = cpu;
+       epc = find_get_pmu_context(pmu, ctx, event);
+@@ -12896,6 +12904,11 @@ static void __perf_pmu_install_event(str
+       if (event->state >= PERF_EVENT_STATE_OFF)
+               event->state = PERF_EVENT_STATE_INACTIVE;
+       perf_install_in_context(ctx, event, cpu);
++
++      /*
++       * Now that event->ctx is updated and visible, put the old ctx.
++       */
++      put_ctx(old_ctx);
+ }
+ 
+ static void __perf_pmu_install(struct perf_event_context *ctx,
+@@ -12934,6 +12947,10 @@ void perf_pmu_migrate_context(struct pmu
+       struct perf_event_context *src_ctx, *dst_ctx;
+       LIST_HEAD(events);
+ 
++      /*
++       * Since per-cpu context is persistent, no need to grab an extra
++       * reference.
++       */
+       src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
+       dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
+ 
diff --git a/queue-6.5/perf-intel-pt-fix-async-branch-flags.patch b/queue-6.5/perf-intel-pt-fix-async-branch-flags.patch

new file mode 100644 (file)

index 0000000..f45610f
--- /dev/null
+++ b/queue-6.5/perf-intel-pt-fix-async-branch-flags.patch
@@ -0,0 +1,37 @@
+From f2d87895cbc4af80649850dcf5da36de6b2ed3dd Mon Sep 17 00:00:00 2001
+From: Adrian Hunter <adrian.hunter@intel.com>
+Date: Thu, 28 Sep 2023 10:29:53 +0300
+Subject: perf intel-pt: Fix async branch flags
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+commit f2d87895cbc4af80649850dcf5da36de6b2ed3dd upstream.
+
+Ensure PERF_IP_FLAG_ASYNC is set always for asynchronous branches (i.e.
+interrupts etc).
+
+Fixes: 90e457f7be08 ("perf tools: Add Intel PT support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Link: https://lore.kernel.org/r/20230928072953.19369-1-adrian.hunter@intel.com
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/perf/util/intel-pt.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/perf/util/intel-pt.c
++++ b/tools/perf/util/intel-pt.c
+@@ -1512,9 +1512,11 @@ static void intel_pt_sample_flags(struct
+       } else if (ptq->state->flags & INTEL_PT_ASYNC) {
+               if (!ptq->state->to_ip)
+                       ptq->flags = PERF_IP_FLAG_BRANCH |
++                                   PERF_IP_FLAG_ASYNC |
+                                    PERF_IP_FLAG_TRACE_END;
+               else if (ptq->state->from_nr && !ptq->state->to_nr)
+                       ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
++                                   PERF_IP_FLAG_ASYNC |
+                                    PERF_IP_FLAG_VMEXIT;
+               else
+                       ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
diff --git a/queue-6.5/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch b/queue-6.5/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch

new file mode 100644 (file)

index 0000000..1b25c58
--- /dev/null
+++ b/queue-6.5/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch
@@ -0,0 +1,44 @@
+From ea142e590aec55ba40c5affb4d49e68c713c63dc Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 19 Oct 2023 01:34:23 +1000
+Subject: powerpc/perf: Fix disabling BHRB and instruction sampling
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit ea142e590aec55ba40c5affb4d49e68c713c63dc upstream.
+
+When the PMU is disabled, MMCRA is not updated to disable BHRB and
+instruction sampling. This can lead to those features remaining enabled,
+which can slow down a real or emulated CPU.
+
+Fixes: 1cade527f6e9 ("powerpc/perf: BHRB control to disable BHRB logic when not used")
+Cc: stable@vger.kernel.org # v5.9+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20231018153423.298373-1-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/perf/core-book3s.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/perf/core-book3s.c
++++ b/arch/powerpc/perf/core-book3s.c
+@@ -1371,8 +1371,7 @@ static void power_pmu_disable(struct pmu
+               /*
+                * Disable instruction sampling if it was enabled
+                */
+-              if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE)
+-                      val &= ~MMCRA_SAMPLE_ENABLE;
++              val &= ~MMCRA_SAMPLE_ENABLE;
+ 
+               /* Disable BHRB via mmcra (BHRBRD) for p10 */
+               if (ppmu->flags & PPMU_ARCH_31)
+@@ -1383,7 +1382,7 @@ static void power_pmu_disable(struct pmu
+                * instruction sampling or BHRB.
+                */
+               if (val != mmcra) {
+-                      mtspr(SPRN_MMCRA, mmcra);
++                      mtspr(SPRN_MMCRA, val);
+                       mb();
+                       isync();
+               }
diff --git a/queue-6.5/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch b/queue-6.5/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch

new file mode 100644 (file)

index 0000000..7e158c6
--- /dev/null
+++ b/queue-6.5/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch
@@ -0,0 +1,59 @@
+From 381fdb73d1e2a48244de7260550e453d1003bb8e Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Fri, 6 Oct 2023 21:09:28 -0700
+Subject: randstruct: Fix gcc-plugin performance mode to stay in group
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 381fdb73d1e2a48244de7260550e453d1003bb8e upstream.
+
+The performance mode of the gcc-plugin randstruct was shuffling struct
+members outside of the cache-line groups. Limit the range to the
+specified group indexes.
+
+Cc: linux-hardening@vger.kernel.org
+Cc: stable@vger.kernel.org
+Reported-by: Lukas Loidolt <e1634039@student.tuwien.ac.at>
+Closes: https://lore.kernel.org/all/f3ca77f0-e414-4065-83a5-ae4c4d25545d@student.tuwien.ac.at
+Fixes: 313dd1b62921 ("gcc-plugins: Add the randstruct plugin")
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/gcc-plugins/randomize_layout_plugin.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/scripts/gcc-plugins/randomize_layout_plugin.c
++++ b/scripts/gcc-plugins/randomize_layout_plugin.c
+@@ -191,12 +191,14 @@ static void partition_struct(tree *field
+ 
+ static void performance_shuffle(tree *newtree, unsigned long length, ranctx *prng_state)
+ {
+-      unsigned long i, x;
++      unsigned long i, x, index;
+       struct partition_group size_group[length];
+       unsigned long num_groups = 0;
+       unsigned long randnum;
+ 
+       partition_struct(newtree, length, (struct partition_group *)&size_group, &num_groups);
++
++      /* FIXME: this group shuffle is currently a no-op. */
+       for (i = num_groups - 1; i > 0; i--) {
+               struct partition_group tmp;
+               randnum = ranval(prng_state) % (i + 1);
+@@ -206,11 +208,14 @@ static void performance_shuffle(tree *ne
+       }
+ 
+       for (x = 0; x < num_groups; x++) {
+-              for (i = size_group[x].start + size_group[x].length - 1; i > size_group[x].start; i--) {
++              for (index = size_group[x].length - 1; index > 0; index--) {
+                       tree tmp;
++
++                      i = size_group[x].start + index;
+                       if (DECL_BIT_FIELD_TYPE(newtree[i]))
+                               continue;
+-                      randnum = ranval(prng_state) % (i + 1);
++                      randnum = ranval(prng_state) % (index + 1);
++                      randnum += size_group[x].start;
+                       // we could handle this case differently if desired
+                       if (DECL_BIT_FIELD_TYPE(newtree[randnum]))
+                               continue;
diff --git a/queue-6.5/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch b/queue-6.5/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch

new file mode 100644 (file)

index 0000000..db34d7b
--- /dev/null
+++ b/queue-6.5/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch
@@ -0,0 +1,47 @@
+From 8e3ed9e786511ad800c33605ed904b9de49323cf Mon Sep 17 00:00:00 2001
+From: Chandrakanth patil <chandrakanth.patil@broadcom.com>
+Date: Tue, 3 Oct 2023 16:30:18 +0530
+Subject: scsi: megaraid_sas: Increase register read retry rount from 3 to 30 for selected registers
+
+From: Chandrakanth patil <chandrakanth.patil@broadcom.com>
+
+commit 8e3ed9e786511ad800c33605ed904b9de49323cf upstream.
+
+In BMC environments with concurrent access to multiple registers, certain
+registers occasionally yield a value of 0 even after 3 retries due to
+hardware errata. As a fix, we have extended the retry count from 3 to 30.
+
+The same errata applies to the mpt3sas driver, and a similar patch has
+been accepted. Please find more details in the mpt3sas patch reference
+link.
+
+Link: https://lore.kernel.org/r/20230829090020.5417-2-ranjan.kumar@broadcom.com
+Fixes: 272652fcbf1a ("scsi: megaraid_sas: add retry logic in megasas_readl")
+Cc: stable@vger.kernel.org
+Signed-off-by: Chandrakanth patil <chandrakanth.patil@broadcom.com>
+Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
+Link: https://lore.kernel.org/r/20231003110021.168862-2-chandrakanth.patil@broadcom.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/megaraid/megaraid_sas_base.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -263,13 +263,13 @@ u32 megasas_readl(struct megasas_instanc
+        * Fusion registers could intermittently return all zeroes.
+        * This behavior is transient in nature and subsequent reads will
+        * return valid value. As a workaround in driver, retry readl for
+-       * upto three times until a non-zero value is read.
++       * up to thirty times until a non-zero value is read.
+        */
+       if (instance->adapter_type == AERO_SERIES) {
+               do {
+                       ret_val = readl(addr);
+                       i++;
+-              } while (ret_val == 0 && i < 3);
++              } while (ret_val == 0 && i < 30);
+               return ret_val;
+       } else {
+               return readl(addr);
diff --git a/queue-6.5/scsi-mpt3sas-fix-loop-logic.patch b/queue-6.5/scsi-mpt3sas-fix-loop-logic.patch

new file mode 100644 (file)

index 0000000..d0f776f
--- /dev/null
+++ b/queue-6.5/scsi-mpt3sas-fix-loop-logic.patch
@@ -0,0 +1,35 @@
+From 3c978492c333f0c08248a8d51cecbe5eb5f617c9 Mon Sep 17 00:00:00 2001
+From: Ranjan Kumar <ranjan.kumar@broadcom.com>
+Date: Fri, 20 Oct 2023 16:28:49 +0530
+Subject: scsi: mpt3sas: Fix loop logic
+
+From: Ranjan Kumar <ranjan.kumar@broadcom.com>
+
+commit 3c978492c333f0c08248a8d51cecbe5eb5f617c9 upstream.
+
+The retry loop continues to iterate until the count reaches 30, even after
+receiving the correct value. Exit loop when a non-zero value is read.
+
+Fixes: 4ca10f3e3174 ("scsi: mpt3sas: Perform additional retries if doorbell read returns 0")
+Cc: stable@vger.kernel.org
+Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
+Link: https://lore.kernel.org/r/20231020105849.6350-1-ranjan.kumar@broadcom.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/mpt3sas/mpt3sas_base.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
+@@ -223,8 +223,8 @@ _base_readl_ext_retry(const volatile voi
+ 
+       for (i = 0 ; i < 30 ; i++) {
+               ret_val = readl(addr);
+-              if (ret_val == 0)
+-                      continue;
++              if (ret_val != 0)
++                      break;
+       }
+ 
+       return ret_val;
diff --git a/queue-6.5/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch b/queue-6.5/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch

new file mode 100644 (file)

index 0000000..c0d780a
--- /dev/null
+++ b/queue-6.5/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch
@@ -0,0 +1,72 @@
+From 19597cad64d608aa8ac2f8aef50a50187a565223 Mon Sep 17 00:00:00 2001
+From: Quinn Tran <qutran@marvell.com>
+Date: Mon, 30 Oct 2023 12:19:12 +0530
+Subject: scsi: qla2xxx: Fix system crash due to bad pointer access
+
+From: Quinn Tran <qutran@marvell.com>
+
+commit 19597cad64d608aa8ac2f8aef50a50187a565223 upstream.
+
+User experiences system crash when running AER error injection.  The
+perturbation causes the abort-all-I/O path to trigger. The driver assumes
+all I/O on this path is FCP only. If there is both NVMe & FCP traffic, a
+system crash happens. Add additional check to see if I/O is FCP or not
+before access.
+
+PID: 999019  TASK: ff35d769f24722c0  CPU: 53  COMMAND: "kworker/53:1"
+ 0 [ff3f78b964847b58] machine_kexec at ffffffffae86973d
+ 1 [ff3f78b964847ba8] __crash_kexec at ffffffffae9be29d
+ 2 [ff3f78b964847c70] crash_kexec at ffffffffae9bf528
+ 3 [ff3f78b964847c78] oops_end at ffffffffae8282ab
+ 4 [ff3f78b964847c98] exc_page_fault at ffffffffaf2da502
+ 5 [ff3f78b964847cc0] asm_exc_page_fault at ffffffffaf400b62
+   [exception RIP: qla2x00_abort_srb+444]
+   RIP: ffffffffc07b5f8c  RSP: ff3f78b964847d78  RFLAGS: 00010046
+   RAX: 0000000000000282  RBX: ff35d74a0195a200  RCX: ff35d76886fd03a0
+   RDX: 0000000000000001  RSI: ffffffffc07c5ec8  RDI: ff35d74a0195a200
+   RBP: ff35d76913d22080   R8: ff35d7694d103200   R9: ff35d7694d103200
+   R10: 0000000100000000  R11: ffffffffb05d6630  R12: 0000000000010000
+   R13: ff3f78b964847df8  R14: ff35d768d8754000  R15: ff35d768877248e0
+   ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
+ 6 [ff3f78b964847d70] qla2x00_abort_srb at ffffffffc07b5f84 [qla2xxx]
+ 7 [ff3f78b964847de0] __qla2x00_abort_all_cmds at ffffffffc07b6238 [qla2xxx]
+ 8 [ff3f78b964847e38] qla2x00_abort_all_cmds at ffffffffc07ba635 [qla2xxx]
+ 9 [ff3f78b964847e58] qla2x00_terminate_rport_io at ffffffffc08145eb [qla2xxx]
+10 [ff3f78b964847e70] fc_terminate_rport_io at ffffffffc045987e [scsi_transport_fc]
+11 [ff3f78b964847e88] process_one_work at ffffffffae914f15
+12 [ff3f78b964847ed0] worker_thread at ffffffffae9154c0
+13 [ff3f78b964847f10] kthread at ffffffffae91c456
+14 [ff3f78b964847f50] ret_from_fork at ffffffffae8036ef
+
+Cc: stable@vger.kernel.org
+Fixes: f45bca8c5052 ("scsi: qla2xxx: Fix double scsi_done for abort path")
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Link: https://lore.kernel.org/r/20231030064912.37912-1-njavali@marvell.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/qla2xxx/qla_os.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -1835,8 +1835,16 @@ static void qla2x00_abort_srb(struct qla
+               }
+ 
+               spin_lock_irqsave(qp->qp_lock_ptr, *flags);
+-              if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
+-                      sp->done(sp, res);
++              switch (sp->type) {
++              case SRB_SCSI_CMD:
++                      if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
++                              sp->done(sp, res);
++                      break;
++              default:
++                      if (ret_cmd)
++                              sp->done(sp, res);
++                      break;
++              }
+       } else {
+               sp->done(sp, res);
+       }
diff --git a/queue-6.5/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch b/queue-6.5/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch

new file mode 100644 (file)

index 0000000..e40bb3b
--- /dev/null
+++ b/queue-6.5/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch
@@ -0,0 +1,51 @@
+From 27900d7119c464b43cd9eac69c85884d17bae240 Mon Sep 17 00:00:00 2001
+From: Peter Wang <peter.wang@mediatek.com>
+Date: Mon, 6 Nov 2023 15:51:17 +0800
+Subject: scsi: ufs: core: Fix racing issue between ufshcd_mcq_abort() and ISR
+
+From: Peter Wang <peter.wang@mediatek.com>
+
+commit 27900d7119c464b43cd9eac69c85884d17bae240 upstream.
+
+If command timeout happens and cq complete IRQ is raised at the same time,
+ufshcd_mcq_abort clears lprb->cmd and a NULL pointer deref happens in the
+ISR. Error log:
+
+ufshcd_abort: Device abort task at tag 18
+Unable to handle kernel NULL pointer dereference at virtual address
+0000000000000108
+pc : [0xffffffe27ef867ac] scsi_dma_unmap+0xc/0x44
+lr : [0xffffffe27f1b898c] ufshcd_release_scsi_cmd+0x24/0x114
+
+Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Wang <peter.wang@mediatek.com>
+Link: https://lore.kernel.org/r/20231106075117.8995-1-peter.wang@mediatek.com
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ufs/core/ufs-mcq.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/ufs/core/ufs-mcq.c
++++ b/drivers/ufs/core/ufs-mcq.c
+@@ -632,6 +632,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *c
+       int tag = scsi_cmd_to_rq(cmd)->tag;
+       struct ufshcd_lrb *lrbp = &hba->lrb[tag];
+       struct ufs_hw_queue *hwq;
++      unsigned long flags;
+       int err = FAILED;
+ 
+       if (!ufshcd_cmd_inflight(lrbp->cmd)) {
+@@ -672,8 +673,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *c
+       }
+ 
+       err = SUCCESS;
++      spin_lock_irqsave(&hwq->cq_lock, flags);
+       if (ufshcd_cmd_inflight(lrbp->cmd))
+               ufshcd_release_scsi_cmd(hba, lrbp);
++      spin_unlock_irqrestore(&hwq->cq_lock, flags);
+ 
+ out:
+       return err;
diff --git a/queue-6.5/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch b/queue-6.5/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch

new file mode 100644 (file)

index 0000000..d36a002
--- /dev/null
+++ b/queue-6.5/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch
@@ -0,0 +1,71 @@
+From fc88ca19ad0989dc0e4d4b126d5d0ba91f6cb616 Mon Sep 17 00:00:00 2001
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Date: Fri, 8 Sep 2023 20:23:28 +0530
+Subject: scsi: ufs: qcom: Update PHY settings only when scaling to higher gears
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+commit fc88ca19ad0989dc0e4d4b126d5d0ba91f6cb616 upstream.
+
+The "hs_gear" variable is used to program the PHY settings (submode) during
+ufs_qcom_power_up_sequence(). Currently, it is being updated every time the
+agreed gear changes. Due to this, if the gear got downscaled before suspend
+(runtime/system), then while resuming, the PHY settings for the lower gear
+will be applied first and later when scaling to max gear with REINIT, the
+PHY settings for the max gear will be applied.
+
+This adds a latency while resuming and also really not needed as the PHY
+gear settings are backwards compatible i.e., we can continue using the PHY
+settings for max gear with lower gear speed.
+
+So let's update the "hs_gear" variable _only_ when the agreed gear is
+greater than the current one. This guarantees that the PHY settings will be
+changed only during probe time and fatal error condition.
+
+Due to this, UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH can now be skipped
+when the PM operation is in progress.
+
+Cc: stable@vger.kernel.org
+Fixes: 96a7141da332 ("scsi: ufs: core: Add support for reinitializing the UFS device")
+Reported-by: Can Guo <quic_cang@quicinc.com>
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20230908145329.154024-1-manivannan.sadhasivam@linaro.org
+Reviewed-by: Can Guo <quic_cang@quicinc.com>
+Tested-by: Can Guo <quic_cang@quicinc.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ufs/core/ufshcd.c   |    3 ++-
+ drivers/ufs/host/ufs-qcom.c |    9 +++++++--
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -8798,7 +8798,8 @@ static int ufshcd_probe_hba(struct ufs_h
+       if (ret)
+               goto out;
+ 
+-      if (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH) {
++      if (!hba->pm_op_in_progress &&
++          (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH)) {
+               /* Reset the device and controller before doing reinit */
+               ufshcd_device_reset(hba);
+               ufshcd_hba_stop(hba);
+--- a/drivers/ufs/host/ufs-qcom.c
++++ b/drivers/ufs/host/ufs-qcom.c
+@@ -820,8 +820,13 @@ static int ufs_qcom_pwr_change_notify(st
+                       return ret;
+               }
+ 
+-              /* Use the agreed gear */
+-              host->hs_gear = dev_req_params->gear_tx;
++              /*
++               * Update hs_gear only when the gears are scaled to a higher value. This is because,
++               * the PHY gear settings are backwards compatible and we only need to change the PHY
++               * settings while scaling to higher gears.
++               */
++              if (dev_req_params->gear_tx > host->hs_gear)
++                      host->hs_gear = dev_req_params->gear_tx;
+ 
+               /* enable the device ref clock before changing to HS mode */
+               if (!ufshcd_is_hs_mode(&hba->pwr_info) &&
diff --git a/queue-6.5/series b/queue-6.5/series

index ab724717f963b2d4dc900832db592cd0d9c2447a..214eceadacea55ea1c3ac8d6de5e5a02dba05727 100644 (file)
--- a/queue-6.5/series
+++ b/queue-6.5/series
@@ -220,3 +220,27 @@ tools-power-turbostat-enable-the-c-state-pre-wake-pr.patch
  scsi-ufs-core-expand-mcq-queue-slot-to-devicequeuede.patch
  cifs-spnego-add-in-host_key_len.patch
  cifs-fix-check-of-rc-in-function-generate_smb3signin.patch
+perf-core-fix-cpuctx-refcounting.patch
+i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch
+perf-arm_cspmu-reject-events-meant-for-other-pmus.patch
+drivers-perf-check-find_first_bit-return-value.patch
+media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch
+perf-intel-pt-fix-async-branch-flags.patch
+powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch
+randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch
+bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch
+bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch
+scsi-mpt3sas-fix-loop-logic.patch
+scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch
+scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch
+scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch
+scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch
+crypto-x86-sha-load-modules-based-on-cpu-features.patch
+x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch
+x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch
+x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch
+kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch
+kvm-x86-ignore-msr_amd64_tw_cfg-access.patch
+kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch
+kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch
+mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch
diff --git a/queue-6.5/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch b/queue-6.5/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch

new file mode 100644 (file)

index 0000000..efadb6e
--- /dev/null
+++ b/queue-6.5/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch
@@ -0,0 +1,191 @@
+From b56ebe7c896dc78b5865ec2c4b1dae3c93537517 Mon Sep 17 00:00:00 2001
+From: Koichiro Den <den@valinux.co.jp>
+Date: Thu, 26 Oct 2023 12:20:36 +0900
+Subject: x86/apic/msi: Fix misconfigured non-maskable MSI quirk
+
+From: Koichiro Den <den@valinux.co.jp>
+
+commit b56ebe7c896dc78b5865ec2c4b1dae3c93537517 upstream.
+
+commit ef8dd01538ea ("genirq/msi: Make interrupt allocation less
+convoluted"), reworked the code so that the x86 specific quirk for affinity
+setting of non-maskable PCI/MSI interrupts is not longer activated if
+necessary.
+
+This could be solved by restoring the original logic in the core MSI code,
+but after a deeper analysis it turned out that the quirk flag is not
+required at all.
+
+The quirk is only required when the PCI/MSI device cannot mask the MSI
+interrupts, which in turn also prevents reservation mode from being enabled
+for the affected interrupt.
+
+This allows ot remove the NOMASK quirk bit completely as msi_set_affinity()
+can instead check whether reservation mode is enabled for the interrupt,
+which gives exactly the same answer.
+
+Even in the momentary non-existing case that the reservation mode would be
+not set for a maskable MSI interrupt this would not cause any harm as it
+just would cause msi_set_affinity() to go needlessly through the
+functionaly equivalent slow path, which works perfectly fine with maskable
+interrupts as well.
+
+Rework msi_set_affinity() to query the reservation mode and remove all
+NOMASK quirk logic from the core code.
+
+[ tglx: Massaged changelog ]
+
+Fixes: ef8dd01538ea ("genirq/msi: Make interrupt allocation less convoluted")
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Koichiro Den <den@valinux.co.jp>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20231026032036.2462428-1-den@valinux.co.jp
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/msi.c |    8 +++-----
+ include/linux/irq.h        |   26 ++++----------------------
+ include/linux/msi.h        |    6 ------
+ kernel/irq/debugfs.c       |    1 -
+ kernel/irq/msi.c           |   12 +-----------
+ 5 files changed, 8 insertions(+), 45 deletions(-)
+
+--- a/arch/x86/kernel/apic/msi.c
++++ b/arch/x86/kernel/apic/msi.c
+@@ -55,14 +55,14 @@ msi_set_affinity(struct irq_data *irqd,
+        * caused by the non-atomic update of the address/data pair.
+        *
+        * Direct update is possible when:
+-       * - The MSI is maskable (remapped MSI does not use this code path)).
+-       *   The quirk bit is not set in this case.
++       * - The MSI is maskable (remapped MSI does not use this code path).
++       *   The reservation mode bit is set in this case.
+        * - The new vector is the same as the old vector
+        * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+        * - The interrupt is not yet started up
+        * - The new destination CPU is the same as the old destination CPU
+        */
+-      if (!irqd_msi_nomask_quirk(irqd) ||
++      if (!irqd_can_reserve(irqd) ||
+           cfg->vector == old_cfg.vector ||
+           old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+           !irqd_is_started(irqd) ||
+@@ -215,8 +215,6 @@ static bool x86_init_dev_msi_info(struct
+               if (WARN_ON_ONCE(domain != real_parent))
+                       return false;
+               info->chip->irq_set_affinity = msi_set_affinity;
+-              /* See msi_set_affinity() for the gory details */
+-              info->flags |= MSI_FLAG_NOMASK_QUIRK;
+               break;
+       case DOMAIN_BUS_DMAR:
+       case DOMAIN_BUS_AMDVI:
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -215,8 +215,6 @@ struct irq_data {
+  * IRQD_SINGLE_TARGET         - IRQ allows only a single affinity target
+  * IRQD_DEFAULT_TRIGGER_SET   - Expected trigger already been set
+  * IRQD_CAN_RESERVE           - Can use reservation mode
+- * IRQD_MSI_NOMASK_QUIRK      - Non-maskable MSI quirk for affinity change
+- *                              required
+  * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked
+  *                              from actual interrupt context.
+  * IRQD_AFFINITY_ON_ACTIVATE  - Affinity is set on activation. Don't call
+@@ -247,11 +245,10 @@ enum {
+       IRQD_SINGLE_TARGET              = BIT(24),
+       IRQD_DEFAULT_TRIGGER_SET        = BIT(25),
+       IRQD_CAN_RESERVE                = BIT(26),
+-      IRQD_MSI_NOMASK_QUIRK           = BIT(27),
+-      IRQD_HANDLE_ENFORCE_IRQCTX      = BIT(28),
+-      IRQD_AFFINITY_ON_ACTIVATE       = BIT(29),
+-      IRQD_IRQ_ENABLED_ON_SUSPEND     = BIT(30),
+-      IRQD_RESEND_WHEN_IN_PROGRESS    = BIT(31),
++      IRQD_HANDLE_ENFORCE_IRQCTX      = BIT(27),
++      IRQD_AFFINITY_ON_ACTIVATE       = BIT(28),
++      IRQD_IRQ_ENABLED_ON_SUSPEND     = BIT(29),
++      IRQD_RESEND_WHEN_IN_PROGRESS    = BIT(30),
+ };
+ 
+ #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
+@@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(stru
+       return __irqd_to_state(d) & IRQD_CAN_RESERVE;
+ }
+ 
+-static inline void irqd_set_msi_nomask_quirk(struct irq_data *d)
+-{
+-      __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK;
+-}
+-
+-static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d)
+-{
+-      __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK;
+-}
+-
+-static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
+-{
+-      return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
+-}
+-
+ static inline void irqd_set_affinity_on_activate(struct irq_data *d)
+ {
+       __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -547,12 +547,6 @@ enum {
+       MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5),
+       /* Free MSI descriptors */
+       MSI_FLAG_FREE_MSI_DESCS         = (1 << 6),
+-      /*
+-       * Quirk to handle MSI implementations which do not provide
+-       * masking. Currently known to affect x86, but has to be partially
+-       * handled in the core MSI code.
+-       */
+-      MSI_FLAG_NOMASK_QUIRK           = (1 << 7),
+ 
+       /* Mask for the generic functionality */
+       MSI_GENERIC_FLAGS_MASK          = GENMASK(15, 0),
+--- a/kernel/irq/debugfs.c
++++ b/kernel/irq/debugfs.c
+@@ -121,7 +121,6 @@ static const struct irq_bit_descr irqdat
+       BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE),
+       BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+       BIT_MASK_DESCR(IRQD_CAN_RESERVE),
+-      BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK),
+ 
+       BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
+ 
+--- a/kernel/irq/msi.c
++++ b/kernel/irq/msi.c
+@@ -1204,7 +1204,6 @@ static int msi_handle_pci_fail(struct ir
+ 
+ #define VIRQ_CAN_RESERVE      0x01
+ #define VIRQ_ACTIVATE         0x02
+-#define VIRQ_NOMASK_QUIRK     0x04
+ 
+ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
+ {
+@@ -1213,8 +1212,6 @@ static int msi_init_virq(struct irq_doma
+ 
+       if (!(vflags & VIRQ_CAN_RESERVE)) {
+               irqd_clr_can_reserve(irqd);
+-              if (vflags & VIRQ_NOMASK_QUIRK)
+-                      irqd_set_msi_nomask_quirk(irqd);
+ 
+               /*
+                * If the interrupt is managed but no CPU is available to
+@@ -1275,15 +1272,8 @@ static int __msi_domain_alloc_irqs(struc
+        * Interrupt can use a reserved vector and will not occupy
+        * a real device vector until the interrupt is requested.
+        */
+-      if (msi_check_reservation_mode(domain, info, dev)) {
++      if (msi_check_reservation_mode(domain, info, dev))
+               vflags |= VIRQ_CAN_RESERVE;
+-              /*
+-               * MSI affinity setting requires a special quirk (X86) when
+-               * reservation mode is active.
+-               */
+-              if (info->flags & MSI_FLAG_NOMASK_QUIRK)
+-                      vflags |= VIRQ_NOMASK_QUIRK;
+-      }
+ 
+       xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
+               if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
diff --git a/queue-6.5/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch b/queue-6.5/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch

new file mode 100644 (file)

index 0000000..a09b66a
--- /dev/null
+++ b/queue-6.5/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch
@@ -0,0 +1,42 @@
+From ee545b94d39a00c93dc98b1dbcbcf731d2eadeb4 Mon Sep 17 00:00:00 2001
+From: Pu Wen <puwen@hygon.cn>
+Date: Mon, 14 Aug 2023 10:18:26 +0200
+Subject: x86/cpu/hygon: Fix the CPU topology evaluation for real
+
+From: Pu Wen <puwen@hygon.cn>
+
+commit ee545b94d39a00c93dc98b1dbcbcf731d2eadeb4 upstream.
+
+Hygon processors with a model ID > 3 have CPUID leaf 0xB correctly
+populated and don't need the fixed package ID shift workaround. The fixup
+is also incorrect when running in a guest.
+
+Fixes: e0ceeae708ce ("x86/CPU/hygon: Fix phys_proc_id calculation logic for multi-die processors")
+Signed-off-by: Pu Wen <puwen@hygon.cn>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/tencent_594804A808BD93A4EBF50A994F228E3A7F07@qq.com
+Link: https://lore.kernel.org/r/20230814085112.089607918@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/hygon.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/hygon.c
++++ b/arch/x86/kernel/cpu/hygon.c
+@@ -86,8 +86,12 @@ static void hygon_get_topology(struct cp
+               if (!err)
+                       c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+ 
+-              /* Socket ID is ApicId[6] for these processors. */
+-              c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT;
++              /*
++               * Socket ID is ApicId[6] for the processors with model <= 0x3
++               * when running on host.
++               */
++              if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3)
++                      c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT;
+ 
+               cacheinfo_hygon_init_llc_id(c, cpu);
+       } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
diff --git a/queue-6.5/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch b/queue-6.5/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch

new file mode 100644 (file)

index 0000000..2c20fd0
--- /dev/null
+++ b/queue-6.5/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch
@@ -0,0 +1,119 @@
+From 7d08f21f8c6307cb05cabb8d86e90ff6ccba57e9 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Wed, 4 Oct 2023 09:49:59 -0500
+Subject: x86/PCI: Avoid PME from D3hot/D3cold for AMD Rembrandt and Phoenix USB4
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit 7d08f21f8c6307cb05cabb8d86e90ff6ccba57e9 upstream.
+
+Iain reports that USB devices can't be used to wake a Lenovo Z13 from
+suspend.  This occurs because on some AMD platforms, even though the Root
+Ports advertise PME_Support for D3hot and D3cold, wakeup events from
+devices on a USB4 controller don't result in wakeup interrupts from the
+Root Port when amd-pmc has put the platform in a hardware sleep state.
+
+If amd-pmc will be involved in the suspend, remove D3hot and D3cold from
+the PME_Support mask of Root Ports above USB4 controllers so we avoid those
+states if we need wakeups.
+
+Restore D3 support at resume so that it can be used by runtime suspend.
+
+This affects both AMD Rembrandt and Phoenix SoCs.
+
+"pm_suspend_target_state == PM_SUSPEND_ON" means we're doing runtime
+suspend, and amd-pmc will not be involved.  In that case PMEs work as
+advertised in D3hot/D3cold, so we don't need to do anything.
+
+Note that amd-pmc is technically optional, and there's no need for this
+quirk if it's not present, but we assume it's always present because power
+consumption is so high without it.
+
+Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
+Link: https://lore.kernel.org/r/20231004144959.158840-1-mario.limonciello@amd.com
+Reported-by: Iain Lane <iain@orangesquash.org.uk>
+Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+[bhelgaas: commit log, move to arch/x86/pci/fixup.c, add #includes]
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/fixup.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 59 insertions(+)
+
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -3,9 +3,11 @@
+  * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+  */
+ 
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+ #include <linux/dmi.h>
+ #include <linux/pci.h>
++#include <linux/suspend.h>
+ #include <linux/vgaarb.h>
+ #include <asm/amd_nb.h>
+ #include <asm/hpet.h>
+@@ -904,3 +906,60 @@ static void chromeos_fixup_apl_pci_l1ss_
+ }
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_save_apl_pci_l1ss_capability);
+ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_fixup_apl_pci_l1ss_capability);
++
++#ifdef CONFIG_SUSPEND
++/*
++ * Root Ports on some AMD SoCs advertise PME_Support for D3hot and D3cold, but
++ * if the SoC is put into a hardware sleep state by the amd-pmc driver, the
++ * Root Ports don't generate wakeup interrupts for USB devices.
++ *
++ * When suspending, remove D3hot and D3cold from the PME_Support advertised
++ * by the Root Port so we don't use those states if we're expecting wakeup
++ * interrupts.  Restore the advertised PME_Support when resuming.
++ */
++static void amd_rp_pme_suspend(struct pci_dev *dev)
++{
++      struct pci_dev *rp;
++
++      /*
++       * PM_SUSPEND_ON means we're doing runtime suspend, which means
++       * amd-pmc will not be involved so PMEs during D3 work as advertised.
++       *
++       * The PMEs *do* work if amd-pmc doesn't put the SoC in the hardware
++       * sleep state, but we assume amd-pmc is always present.
++       */
++      if (pm_suspend_target_state == PM_SUSPEND_ON)
++              return;
++
++      rp = pcie_find_root_port(dev);
++      if (!rp->pm_cap)
++              return;
++
++      rp->pme_support &= ~((PCI_PM_CAP_PME_D3hot|PCI_PM_CAP_PME_D3cold) >>
++                                  PCI_PM_CAP_PME_SHIFT);
++      dev_info_once(&rp->dev, "quirk: disabling D3cold for suspend\n");
++}
++
++static void amd_rp_pme_resume(struct pci_dev *dev)
++{
++      struct pci_dev *rp;
++      u16 pmc;
++
++      rp = pcie_find_root_port(dev);
++      if (!rp->pm_cap)
++              return;
++
++      pci_read_config_word(rp, rp->pm_cap + PCI_PM_PMC, &pmc);
++      rp->pme_support = FIELD_GET(PCI_PM_CAP_PME_MASK, pmc);
++}
++/* Rembrandt (yellow_carp) */
++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162e, amd_rp_pme_suspend);
++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162e, amd_rp_pme_resume);
++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162f, amd_rp_pme_suspend);
++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162f, amd_rp_pme_resume);
++/* Phoenix (pink_sardine) */
++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1668, amd_rp_pme_suspend);
++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1668, amd_rp_pme_resume);
++DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1669, amd_rp_pme_suspend);
++DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1669, amd_rp_pme_resume);
++#endif /* CONFIG_SUSPEND */
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 20 Nov 2023 15:32:08 +0000 (16:32 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 20 Nov 2023 15:32:08 +0000 (16:32 +0100)
queue-6.5/bpf-fix-check_stack_write_fixed_off-to-correctly-spill-imm.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/bpf-fix-precision-tracking-for-bpf_alu-bpf_to_be-bpf_end.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/crypto-x86-sha-load-modules-based-on-cpu-features.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/drivers-perf-check-find_first_bit-return-value.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/i915-perf-fix-null-deref-bugs-with-drm_dbg-calls.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/kvm-x86-clear-bit12-of-icr-after-apic-write-vm-exit.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/kvm-x86-fix-lapic-timer-interrupt-lost-after-loading-a-snapshot.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/kvm-x86-hyper-v-don-t-auto-enable-stimer-on-write-from-user-space.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/kvm-x86-ignore-msr_amd64_tw_cfg-access.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/media-venus-hfi-add-checks-to-perform-sanity-on-queue-pointers.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/mmc-sdhci-pci-gli-gl9755-mask-the-replay-timer-timeout-of-aer.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/perf-arm_cspmu-reject-events-meant-for-other-pmus.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/perf-core-fix-cpuctx-refcounting.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/perf-intel-pt-fix-async-branch-flags.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/powerpc-perf-fix-disabling-bhrb-and-instruction-sampling.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/randstruct-fix-gcc-plugin-performance-mode-to-stay-in-group.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/scsi-megaraid_sas-increase-register-read-retry-rount-from-3-to-30-for-selected-registers.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/scsi-mpt3sas-fix-loop-logic.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/scsi-qla2xxx-fix-system-crash-due-to-bad-pointer-access.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/scsi-ufs-core-fix-racing-issue-between-ufshcd_mcq_abort-and-isr.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/scsi-ufs-qcom-update-phy-settings-only-when-scaling-to-higher-gears.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/series		patch \| blob \| blame \| history
queue-6.5/x86-apic-msi-fix-misconfigured-non-maskable-msi-quirk.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-cpu-hygon-fix-the-cpu-topology-evaluation-for-real.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/x86-pci-avoid-pme-from-d3hot-d3cold-for-amd-rembrandt-and-phoenix-usb4.patch	[new file with mode: 0644]	patch \| blob