6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Oct 2023 14:12:41 +0000 (16:12 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Oct 2023 14:12:41 +0000 (16:12 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Oct 2023 14:12:41 +0000 (16:12 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Oct 2023 14:12:41 +0000 (16:12 +0200)
diff --git a/queue-6.1/kvm-svm-fix-tsc_aux-virtualization-setup.patch b/queue-6.1/kvm-svm-fix-tsc_aux-virtualization-setup.patch

new file mode 100644 (file)

index 0000000..0e7fc2d
--- /dev/null
+++ b/queue-6.1/kvm-svm-fix-tsc_aux-virtualization-setup.patch
@@ -0,0 +1,124 @@
+From e0096d01c4fcb8c96c05643cfc2c20ab78eae4da Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Fri, 15 Sep 2023 15:54:30 -0500
+Subject: KVM: SVM: Fix TSC_AUX virtualization setup
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit e0096d01c4fcb8c96c05643cfc2c20ab78eae4da upstream.
+
+The checks for virtualizing TSC_AUX occur during the vCPU reset processing
+path. However, at the time of initial vCPU reset processing, when the vCPU
+is first created, not all of the guest CPUID information has been set. In
+this case the RDTSCP and RDPID feature support for the guest is not in
+place and so TSC_AUX virtualization is not established.
+
+This continues for each vCPU created for the guest. On the first boot of
+an AP, vCPU reset processing is executed as a result of an APIC INIT
+event, this time with all of the guest CPUID information set, resulting
+in TSC_AUX virtualization being enabled, but only for the APs. The BSP
+always sees a TSC_AUX value of 0 which probably went unnoticed because,
+at least for Linux, the BSP TSC_AUX value is 0.
+
+Move the TSC_AUX virtualization enablement out of the init_vmcb() path and
+into the vcpu_after_set_cpuid() path to allow for proper initialization of
+the support after the guest CPUID information has been set.
+
+With the TSC_AUX virtualization support now in the vcpu_set_after_cpuid()
+path, the intercepts must be either cleared or set based on the guest
+CPUID input.
+
+Fixes: 296d5a17e793 ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts")
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Message-Id: <4137fbcb9008951ab5f0befa74a0399d2cce809a.1694811272.git.thomas.lendacky@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c |   31 ++++++++++++++++++++++++++-----
+ arch/x86/kvm/svm/svm.c |    9 ++-------
+ arch/x86/kvm/svm/svm.h |    1 +
+ 3 files changed, 29 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -2941,6 +2941,32 @@ int sev_es_string_io(struct vcpu_svm *sv
+                                   count, in);
+ }
+ 
++static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
++{
++      struct kvm_vcpu *vcpu = &svm->vcpu;
++
++      if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
++              bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
++                               guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
++
++              set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
++      }
++}
++
++void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
++{
++      struct kvm_vcpu *vcpu = &svm->vcpu;
++      struct kvm_cpuid_entry2 *best;
++
++      /* For sev guests, the memory encryption bit is not reserved in CR3.  */
++      best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
++      if (best)
++              vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
++
++      if (sev_es_guest(svm->vcpu.kvm))
++              sev_es_vcpu_after_set_cpuid(svm);
++}
++
+ static void sev_es_init_vmcb(struct vcpu_svm *svm)
+ {
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+@@ -2987,11 +3013,6 @@ static void sev_es_init_vmcb(struct vcpu
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+-
+-      if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
+-          (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
+-           guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID)))
+-              set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
+ }
+ 
+ void sev_init_vmcb(struct vcpu_svm *svm)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4173,7 +4173,6 @@ static bool svm_has_emulated_msr(struct
+ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+-      struct kvm_cpuid_entry2 *best;
+ 
+       vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+                                   boot_cpu_has(X86_FEATURE_XSAVE) &&
+@@ -4198,12 +4197,8 @@ static void svm_vcpu_after_set_cpuid(str
+ 
+       svm_recalc_instruction_intercepts(vcpu, svm);
+ 
+-      /* For sev guests, the memory encryption bit is not reserved in CR3.  */
+-      if (sev_guest(vcpu->kvm)) {
+-              best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
+-              if (best)
+-                      vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
+-      }
++      if (sev_guest(vcpu->kvm))
++              sev_vcpu_after_set_cpuid(svm);
+ 
+       init_vmcb_after_set_cpuid(vcpu);
+ }
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -677,6 +677,7 @@ void __init sev_hardware_setup(void);
+ void sev_hardware_unsetup(void);
+ int sev_cpu_init(struct svm_cpu_data *sd);
+ void sev_init_vmcb(struct vcpu_svm *svm);
++void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
+ void sev_free_vcpu(struct kvm_vcpu *vcpu);
+ int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
+ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
diff --git a/queue-6.1/kvm-svm-intercept_rdtscp-is-never-intercepted-anyway.patch b/queue-6.1/kvm-svm-intercept_rdtscp-is-never-intercepted-anyway.patch

new file mode 100644 (file)

index 0000000..faef4ff
--- /dev/null
+++ b/queue-6.1/kvm-svm-intercept_rdtscp-is-never-intercepted-anyway.patch
@@ -0,0 +1,38 @@
+From e8d93d5d93f85949e7299be289c6e7e1154b2f78 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 22 Sep 2023 17:06:34 -0400
+Subject: KVM: SVM: INTERCEPT_RDTSCP is never intercepted anyway
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit e8d93d5d93f85949e7299be289c6e7e1154b2f78 upstream.
+
+svm_recalc_instruction_intercepts() is always called at least once
+before the vCPU is started, so the setting or clearing of the RDTSCP
+intercept can be dropped from the TSC_AUX virtualization support.
+
+Extracted from a patch by Tom Lendacky.
+
+Cc: stable@vger.kernel.org
+Fixes: 296d5a17e793 ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts")
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -2990,11 +2990,8 @@ static void sev_es_init_vmcb(struct vcpu
+ 
+       if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
+           (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
+-           guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) {
++           guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID)))
+               set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
+-              if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
+-                      svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+-      }
+ }
+ 
+ void sev_init_vmcb(struct vcpu_svm *svm)
diff --git a/queue-6.1/kvm-x86-mmu-do-not-filter-address-spaces-in-for_each_tdp_mmu_root_yield_safe.patch b/queue-6.1/kvm-x86-mmu-do-not-filter-address-spaces-in-for_each_tdp_mmu_root_yield_safe.patch

new file mode 100644 (file)

index 0000000..09be0dd
--- /dev/null
+++ b/queue-6.1/kvm-x86-mmu-do-not-filter-address-spaces-in-for_each_tdp_mmu_root_yield_safe.patch
@@ -0,0 +1,122 @@
+From 441a5dfcd96854cbcb625709e2694a9c60adfaab Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 21 Sep 2023 05:44:56 -0400
+Subject: KVM: x86/mmu: Do not filter address spaces in for_each_tdp_mmu_root_yield_safe()
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 441a5dfcd96854cbcb625709e2694a9c60adfaab upstream.
+
+All callers except the MMU notifier want to process all address spaces.
+Remove the address space ID argument of for_each_tdp_mmu_root_yield_safe()
+and switch the MMU notifier to use __for_each_tdp_mmu_root_yield_safe().
+
+Extracted out of a patch by Sean Christopherson <seanjc@google.com>
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c     |    8 ++------
+ arch/x86/kvm/mmu/tdp_mmu.c |   22 +++++++++++-----------
+ arch/x86/kvm/mmu/tdp_mmu.h |    3 +--
+ 3 files changed, 14 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -6079,7 +6079,6 @@ static bool kvm_rmap_zap_gfn_range(struc
+ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+ {
+       bool flush;
+-      int i;
+ 
+       if (WARN_ON_ONCE(gfn_end <= gfn_start))
+               return;
+@@ -6090,11 +6089,8 @@ void kvm_zap_gfn_range(struct kvm *kvm,
+ 
+       flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
+ 
+-      if (is_tdp_mmu_enabled(kvm)) {
+-              for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+-                      flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start,
+-                                                    gfn_end, flush);
+-      }
++      if (is_tdp_mmu_enabled(kvm))
++              flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
+ 
+       if (flush)
+               kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -222,8 +222,12 @@ static struct kvm_mmu_page *tdp_mmu_next
+ #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared)  \
+       __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
+ 
+-#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id)                 \
+-      __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false)
++#define for_each_tdp_mmu_root_yield_safe(_kvm, _root)                 \
++      for (_root = tdp_mmu_next_root(_kvm, NULL, false, false);               \
++           _root;                                                             \
++           _root = tdp_mmu_next_root(_kvm, _root, false, false))              \
++              if (!kvm_lockdep_assert_mmu_lock_held(_kvm, false)) {           \
++              } else
+ 
+ /*
+  * Iterate over all TDP MMU roots.  Requires that mmu_lock be held for write,
+@@ -955,12 +959,11 @@ static bool tdp_mmu_zap_leafs(struct kvm
+  * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
+  * more SPTEs were zapped since the MMU lock was last acquired.
+  */
+-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
+-                         bool flush)
++bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
+ {
+       struct kvm_mmu_page *root;
+ 
+-      for_each_tdp_mmu_root_yield_safe(kvm, root, as_id)
++      for_each_tdp_mmu_root_yield_safe(kvm, root)
+               flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
+ 
+       return flush;
+@@ -969,7 +972,6 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *k
+ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
+ {
+       struct kvm_mmu_page *root;
+-      int i;
+ 
+       /*
+        * Zap all roots, including invalid roots, as all SPTEs must be dropped
+@@ -983,10 +985,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm
+        * is being destroyed or the userspace VMM has exited.  In both cases,
+        * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
+        */
+-      for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+-              for_each_tdp_mmu_root_yield_safe(kvm, root, i)
+-                      tdp_mmu_zap_root(kvm, root, false);
+-      }
++      for_each_tdp_mmu_root_yield_safe(kvm, root)
++              tdp_mmu_zap_root(kvm, root, false);
+ }
+ 
+ /*
+@@ -1223,7 +1223,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct
+ {
+       struct kvm_mmu_page *root;
+ 
+-      for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id)
++      __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
+               flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
+                                         range->may_block, flush);
+ 
+--- a/arch/x86/kvm/mmu/tdp_mmu.h
++++ b/arch/x86/kvm/mmu/tdp_mmu.h
+@@ -15,8 +15,7 @@ __must_check static inline bool kvm_tdp_
+ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
+                         bool shared);
+ 
+-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
+-                         bool flush);
++bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
+ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
+ void kvm_tdp_mmu_zap_all(struct kvm *kvm);
+ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);
diff --git a/queue-6.1/kvm-x86-mmu-open-code-leaf-invalidation-from-mmu_notifier.patch b/queue-6.1/kvm-x86-mmu-open-code-leaf-invalidation-from-mmu_notifier.patch

new file mode 100644 (file)

index 0000000..d66af4b
--- /dev/null
+++ b/queue-6.1/kvm-x86-mmu-open-code-leaf-invalidation-from-mmu_notifier.patch
@@ -0,0 +1,87 @@
+From 50107e8b2a8a59d8cec7e8454e27c1f8e365acdb Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 15 Sep 2023 17:39:14 -0700
+Subject: KVM: x86/mmu: Open code leaf invalidation from mmu_notifier
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 50107e8b2a8a59d8cec7e8454e27c1f8e365acdb upstream.
+
+The mmu_notifier path is a bit of a special snowflake, e.g. it zaps only a
+single address space (because it's per-slot), and can't always yield.
+Because of this, it calls kvm_tdp_mmu_zap_leafs() in ways that no one
+else does.
+
+Iterate manually over the leafs in response to an mmu_notifier
+invalidation, instead of invoking kvm_tdp_mmu_zap_leafs().  Drop the
+@can_yield param from kvm_tdp_mmu_zap_leafs() as its sole remaining
+caller unconditionally passes "true".
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230916003916.2545000-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c     |    2 +-
+ arch/x86/kvm/mmu/tdp_mmu.c |   13 +++++++++----
+ arch/x86/kvm/mmu/tdp_mmu.h |    4 ++--
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -6093,7 +6093,7 @@ void kvm_zap_gfn_range(struct kvm *kvm,
+       if (is_tdp_mmu_enabled(kvm)) {
+               for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+                       flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start,
+-                                                    gfn_end, true, flush);
++                                                    gfn_end, flush);
+       }
+ 
+       if (flush)
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -956,12 +956,12 @@ static bool tdp_mmu_zap_leafs(struct kvm
+  * more SPTEs were zapped since the MMU lock was last acquired.
+  */
+ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
+-                         bool can_yield, bool flush)
++                         bool flush)
+ {
+       struct kvm_mmu_page *root;
+ 
+       for_each_tdp_mmu_root_yield_safe(kvm, root, as_id)
+-              flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush);
++              flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
+ 
+       return flush;
+ }
+@@ -1221,8 +1221,13 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcp
+ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
+                                bool flush)
+ {
+-      return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start,
+-                                   range->end, range->may_block, flush);
++      struct kvm_mmu_page *root;
++
++      for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id)
++              flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
++                                        range->may_block, flush);
++
++      return flush;
+ }
+ 
+ typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,
+--- a/arch/x86/kvm/mmu/tdp_mmu.h
++++ b/arch/x86/kvm/mmu/tdp_mmu.h
+@@ -15,8 +15,8 @@ __must_check static inline bool kvm_tdp_
+ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
+                         bool shared);
+ 
+-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start,
+-                               gfn_t end, bool can_yield, bool flush);
++bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
++                         bool flush);
+ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
+ void kvm_tdp_mmu_zap_all(struct kvm *kvm);
+ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);
diff --git a/queue-6.1/misc-rtsx-fix-some-platforms-can-not-boot-and-move-the-l1ss-judgment-to-probe.patch b/queue-6.1/misc-rtsx-fix-some-platforms-can-not-boot-and-move-the-l1ss-judgment-to-probe.patch

new file mode 100644 (file)

index 0000000..caf3e4b
--- /dev/null
+++ b/queue-6.1/misc-rtsx-fix-some-platforms-can-not-boot-and-move-the-l1ss-judgment-to-probe.patch
@@ -0,0 +1,517 @@
+From 0e4cac557531a4c93de108d9ff11329fcad482ff Mon Sep 17 00:00:00 2001
+From: Ricky WU <ricky_wu@realtek.com>
+Date: Wed, 20 Sep 2023 09:11:19 +0000
+Subject: misc: rtsx: Fix some platforms can not boot and move the l1ss judgment to probe
+
+From: Ricky WU <ricky_wu@realtek.com>
+
+commit 0e4cac557531a4c93de108d9ff11329fcad482ff upstream.
+
+commit 101bd907b424 ("misc: rtsx: judge ASPM Mode to set PETXCFG Reg")
+some readers no longer force #CLKREQ to low
+when the system need to enter ASPM.
+But some platform maybe not implement complete ASPM?
+it causes some platforms can not boot
+
+Like in the past only the platform support L1ss we release the #CLKREQ.
+Move the judgment (L1ss) to probe,
+we think read config space one time when the driver start is enough
+
+Fixes: 101bd907b424 ("misc: rtsx: judge ASPM Mode to set PETXCFG Reg")
+Cc: stable <stable@kernel.org>
+Reported-by: Paul Grandperrin <paul.grandperrin@gmail.com>
+Signed-off-by: Ricky Wu <ricky_wu@realtek.com>
+Tested-By: Jade Lovelace <lists@jade.fyi>
+Link: https://lore.kernel.org/r/37b1afb997f14946a8784c73d1f9a4f5@realtek.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/misc/cardreader/rts5227.c  |   55 +++--------------------------------
+ drivers/misc/cardreader/rts5228.c  |   57 +++++++++++--------------------------
+ drivers/misc/cardreader/rts5249.c  |   56 ++++--------------------------------
+ drivers/misc/cardreader/rts5260.c  |   43 ++++++++-------------------
+ drivers/misc/cardreader/rts5261.c  |   52 ++++++++-------------------------
+ drivers/misc/cardreader/rtsx_pcr.c |   51 +++++++++++++++++++++++++++++----
+ 6 files changed, 102 insertions(+), 212 deletions(-)
+
+--- a/drivers/misc/cardreader/rts5227.c
++++ b/drivers/misc/cardreader/rts5227.c
+@@ -83,63 +83,20 @@ static void rts5227_fetch_vendor_setting
+ 
+ static void rts5227_init_from_cfg(struct rtsx_pcr *pcr)
+ {
+-      struct pci_dev *pdev = pcr->pci;
+-      int l1ss;
+-      u32 lval;
+       struct rtsx_cr_option *option = &pcr->option;
+ 
+-      l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS);
+-      if (!l1ss)
+-              return;
+-
+-      pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval);
+-
+       if (CHK_PCI_PID(pcr, 0x522A)) {
+-              if (0 == (lval & 0x0F))
+-                      rtsx_pci_enable_oobs_polling(pcr);
+-              else
++              if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
++                              | PM_L1_1_EN | PM_L1_2_EN))
+                       rtsx_pci_disable_oobs_polling(pcr);
++              else
++                      rtsx_pci_enable_oobs_polling(pcr);
+       }
+ 
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
+-              rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, PM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
+-              rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, PM_L1_2_EN);
+-
+       if (option->ltr_en) {
+-              u16 val;
+-
+-              pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val);
+-              if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+-                      option->ltr_enabled = true;
+-                      option->ltr_active = true;
++              if (option->ltr_enabled)
+                       rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+-              } else {
+-                      option->ltr_enabled = false;
+-              }
+       }
+-
+-      if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+-                              | PM_L1_1_EN | PM_L1_2_EN))
+-              option->force_clkreq_0 = false;
+-      else
+-              option->force_clkreq_0 = true;
+-
+ }
+ 
+ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
+@@ -195,7 +152,7 @@ static int rts5227_extra_init_hw(struct
+               }
+       }
+ 
+-      if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
++      if (option->force_clkreq_0)
+               rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+       else
+--- a/drivers/misc/cardreader/rts5228.c
++++ b/drivers/misc/cardreader/rts5228.c
+@@ -386,59 +386,25 @@ static void rts5228_process_ocp(struct r
+ 
+ static void rts5228_init_from_cfg(struct rtsx_pcr *pcr)
+ {
+-      struct pci_dev *pdev = pcr->pci;
+-      int l1ss;
+-      u32 lval;
+       struct rtsx_cr_option *option = &pcr->option;
+ 
+-      l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS);
+-      if (!l1ss)
+-              return;
+-
+-      pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval);
+-
+-      if (0 == (lval & 0x0F))
+-              rtsx_pci_enable_oobs_polling(pcr);
+-      else
++      if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
++                              | PM_L1_1_EN | PM_L1_2_EN))
+               rtsx_pci_disable_oobs_polling(pcr);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
+-              rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+       else
+-              rtsx_clear_dev_flag(pcr, PM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
+-              rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, PM_L1_2_EN);
++              rtsx_pci_enable_oobs_polling(pcr);
+ 
+       rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0);
+-      if (option->ltr_en) {
+-              u16 val;
+ 
+-              pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val);
+-              if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+-                      option->ltr_enabled = true;
+-                      option->ltr_active = true;
++      if (option->ltr_en) {
++              if (option->ltr_enabled)
+                       rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+-              } else {
+-                      option->ltr_enabled = false;
+-              }
+       }
+ }
+ 
+ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
+ {
++      struct rtsx_cr_option *option = &pcr->option;
+ 
+       rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
+                       CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+@@ -469,6 +435,17 @@ static int rts5228_extra_init_hw(struct
+       else
+               rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
+ 
++      /*
++       * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
++       * to drive low, and we forcibly request clock.
++       */
++      if (option->force_clkreq_0)
++              rtsx_pci_write_register(pcr, PETXCFG,
++                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
++      else
++              rtsx_pci_write_register(pcr, PETXCFG,
++                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
++
+       rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
+ 
+       if (pcr->rtd3_en) {
+--- a/drivers/misc/cardreader/rts5249.c
++++ b/drivers/misc/cardreader/rts5249.c
+@@ -86,64 +86,22 @@ static void rtsx_base_fetch_vendor_setti
+ 
+ static void rts5249_init_from_cfg(struct rtsx_pcr *pcr)
+ {
+-      struct pci_dev *pdev = pcr->pci;
+-      int l1ss;
+       struct rtsx_cr_option *option = &(pcr->option);
+-      u32 lval;
+-
+-      l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS);
+-      if (!l1ss)
+-              return;
+-
+-      pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval);
+ 
+       if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
+-              if (0 == (lval & 0x0F))
+-                      rtsx_pci_enable_oobs_polling(pcr);
+-              else
++              if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
++                              | PM_L1_1_EN | PM_L1_2_EN))
+                       rtsx_pci_disable_oobs_polling(pcr);
++              else
++                      rtsx_pci_enable_oobs_polling(pcr);
+       }
+ 
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
+-              rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
+-              rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+-
+       if (option->ltr_en) {
+-              u16 val;
+-
+-              pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &val);
+-              if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+-                      option->ltr_enabled = true;
+-                      option->ltr_active = true;
++              if (option->ltr_enabled)
+                       rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+-              } else {
+-                      option->ltr_enabled = false;
+-              }
+       }
+ }
+ 
+-static int rts5249_init_from_hw(struct rtsx_pcr *pcr)
+-{
+-      struct rtsx_cr_option *option = &(pcr->option);
+-
+-      if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+-                              | PM_L1_1_EN | PM_L1_2_EN))
+-              option->force_clkreq_0 = false;
+-      else
+-              option->force_clkreq_0 = true;
+-
+-      return 0;
+-}
+-
+ static void rts52xa_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+ {
+       /* Set relink_time to 0 */
+@@ -276,7 +234,6 @@ static int rts5249_extra_init_hw(struct
+       struct rtsx_cr_option *option = &(pcr->option);
+ 
+       rts5249_init_from_cfg(pcr);
+-      rts5249_init_from_hw(pcr);
+ 
+       rtsx_pci_init_cmd(pcr);
+ 
+@@ -327,11 +284,12 @@ static int rts5249_extra_init_hw(struct
+               }
+       }
+ 
++
+       /*
+        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+        * to drive low, and we forcibly request clock.
+        */
+-      if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
++      if (option->force_clkreq_0)
+               rtsx_pci_write_register(pcr, PETXCFG,
+                       FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+       else
+--- a/drivers/misc/cardreader/rts5260.c
++++ b/drivers/misc/cardreader/rts5260.c
+@@ -480,47 +480,19 @@ static void rts5260_pwr_saving_setting(s
+ 
+ static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
+ {
+-      struct pci_dev *pdev = pcr->pci;
+-      int l1ss;
+       struct rtsx_cr_option *option = &pcr->option;
+-      u32 lval;
+-
+-      l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS);
+-      if (!l1ss)
+-              return;
+-
+-      pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
+-              rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
+-              rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+ 
+       rts5260_pwr_saving_setting(pcr);
+ 
+       if (option->ltr_en) {
+-              u16 val;
+-
+-              pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &val);
+-              if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+-                      option->ltr_enabled = true;
+-                      option->ltr_active = true;
++              if (option->ltr_enabled)
+                       rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+-              } else {
+-                      option->ltr_enabled = false;
+-              }
+       }
+ }
+ 
+ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
+ {
++      struct rtsx_cr_option *option = &pcr->option;
+ 
+       /* Set mcu_cnt to 7 to ensure data can be sampled properly */
+       rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
+@@ -539,6 +511,17 @@ static int rts5260_extra_init_hw(struct
+ 
+       rts5260_init_hw(pcr);
+ 
++      /*
++       * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
++       * to drive low, and we forcibly request clock.
++       */
++      if (option->force_clkreq_0)
++              rtsx_pci_write_register(pcr, PETXCFG,
++                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
++      else
++              rtsx_pci_write_register(pcr, PETXCFG,
++                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
++
+       rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+ 
+       return 0;
+--- a/drivers/misc/cardreader/rts5261.c
++++ b/drivers/misc/cardreader/rts5261.c
+@@ -454,54 +454,17 @@ static void rts5261_init_from_hw(struct
+ 
+ static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
+ {
+-      struct pci_dev *pdev = pcr->pci;
+-      int l1ss;
+-      u32 lval;
+       struct rtsx_cr_option *option = &pcr->option;
+ 
+-      l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS);
+-      if (!l1ss)
+-              return;
+-
+-      pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
+-              rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
+-              rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, PM_L1_1_EN);
+-
+-      if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
+-              rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+-      else
+-              rtsx_clear_dev_flag(pcr, PM_L1_2_EN);
+-
+-      rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0);
+       if (option->ltr_en) {
+-              u16 val;
+-
+-              pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &val);
+-              if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+-                      option->ltr_enabled = true;
+-                      option->ltr_active = true;
++              if (option->ltr_enabled)
+                       rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+-              } else {
+-                      option->ltr_enabled = false;
+-              }
+       }
+ }
+ 
+ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
+ {
++      struct rtsx_cr_option *option = &pcr->option;
+       u32 val;
+ 
+       rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
+@@ -547,6 +510,17 @@ static int rts5261_extra_init_hw(struct
+       else
+               rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
+ 
++      /*
++       * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
++       * to drive low, and we forcibly request clock.
++       */
++      if (option->force_clkreq_0)
++              rtsx_pci_write_register(pcr, PETXCFG,
++                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
++      else
++              rtsx_pci_write_register(pcr, PETXCFG,
++                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
++
+       rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
+ 
+       if (pcr->rtd3_en) {
+--- a/drivers/misc/cardreader/rtsx_pcr.c
++++ b/drivers/misc/cardreader/rtsx_pcr.c
+@@ -1326,11 +1326,8 @@ static int rtsx_pci_init_hw(struct rtsx_
+                       return err;
+       }
+ 
+-      if (pcr->aspm_mode == ASPM_MODE_REG) {
++      if (pcr->aspm_mode == ASPM_MODE_REG)
+               rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
+-              rtsx_pci_write_register(pcr, PETXCFG,
+-                              FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+-      }
+ 
+       /* No CD interrupt if probing driver with card inserted.
+        * So we need to initialize pcr->card_exist here.
+@@ -1345,7 +1342,9 @@ static int rtsx_pci_init_hw(struct rtsx_
+ 
+ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
+ {
+-      int err;
++      struct rtsx_cr_option *option = &(pcr->option);
++      int err, l1ss;
++      u32 lval;
+       u16 cfg_val;
+       u8 val;
+ 
+@@ -1430,6 +1429,48 @@ static int rtsx_pci_init_chip(struct rts
+                       pcr->aspm_enabled = true;
+       }
+ 
++      l1ss = pci_find_ext_capability(pcr->pci, PCI_EXT_CAP_ID_L1SS);
++      if (l1ss) {
++              pci_read_config_dword(pcr->pci, l1ss + PCI_L1SS_CTL1, &lval);
++
++              if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
++                      rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
++              else
++                      rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN);
++
++              if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
++                      rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
++              else
++                      rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN);
++
++              if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
++                      rtsx_set_dev_flag(pcr, PM_L1_1_EN);
++              else
++                      rtsx_clear_dev_flag(pcr, PM_L1_1_EN);
++
++              if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
++                      rtsx_set_dev_flag(pcr, PM_L1_2_EN);
++              else
++                      rtsx_clear_dev_flag(pcr, PM_L1_2_EN);
++
++              pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cfg_val);
++              if (cfg_val & PCI_EXP_DEVCTL2_LTR_EN) {
++                      option->ltr_enabled = true;
++                      option->ltr_active = true;
++              } else {
++                      option->ltr_enabled = false;
++              }
++
++              if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
++                              | PM_L1_1_EN | PM_L1_2_EN))
++                      option->force_clkreq_0 = false;
++              else
++                      option->force_clkreq_0 = true;
++      } else {
++              option->ltr_enabled = false;
++              option->force_clkreq_0 = true;
++      }
++
+       if (pcr->ops->fetch_vendor_settings)
+               pcr->ops->fetch_vendor_settings(pcr);
+ 
diff --git a/queue-6.1/mptcp-fix-bogus-receive-window-shrinkage-with-multiple-subflows.patch b/queue-6.1/mptcp-fix-bogus-receive-window-shrinkage-with-multiple-subflows.patch

new file mode 100644 (file)

index 0000000..2258c16
--- /dev/null
+++ b/queue-6.1/mptcp-fix-bogus-receive-window-shrinkage-with-multiple-subflows.patch
@@ -0,0 +1,50 @@
+From 6bec041147a2a64a490d1f813e8a004443061b38 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Sat, 16 Sep 2023 12:52:45 +0200
+Subject: mptcp: fix bogus receive window shrinkage with multiple subflows
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 6bec041147a2a64a490d1f813e8a004443061b38 upstream.
+
+In case multiple subflows race to update the mptcp-level receive
+window, the subflow losing the race should use the window value
+provided by the "winning" subflow to update it's own tcp-level
+rcv_wnd.
+
+To such goal, the current code bogusly uses the mptcp-level rcv_wnd
+value as observed before the update attempt. On unlucky circumstances
+that may lead to TCP-level window shrinkage, and stall the other end.
+
+Address the issue feeding to the rcv wnd update the correct value.
+
+Fixes: f3589be0c420 ("mptcp: never shrink offered window")
+Cc: stable@vger.kernel.org
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/427
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/options.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -1248,12 +1248,13 @@ static void mptcp_set_rwin(struct tcp_so
+ 
+                       if (rcv_wnd == rcv_wnd_old)
+                               break;
+-                      if (before64(rcv_wnd_new, rcv_wnd)) {
++
++                      rcv_wnd_old = rcv_wnd;
++                      if (before64(rcv_wnd_new, rcv_wnd_old)) {
+                               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
+                               goto raise_win;
+                       }
+                       MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
+-                      rcv_wnd_old = rcv_wnd;
+               }
+               return;
+       }
diff --git a/queue-6.1/nilfs2-fix-potential-use-after-free-in-nilfs_gccache_submit_read_data.patch b/queue-6.1/nilfs2-fix-potential-use-after-free-in-nilfs_gccache_submit_read_data.patch

new file mode 100644 (file)

index 0000000..095d33a
--- /dev/null
+++ b/queue-6.1/nilfs2-fix-potential-use-after-free-in-nilfs_gccache_submit_read_data.patch
@@ -0,0 +1,61 @@
+From 7ee29facd8a9c5a26079148e36bcf07141b3a6bc Mon Sep 17 00:00:00 2001
+From: Pan Bian <bianpan2016@163.com>
+Date: Thu, 21 Sep 2023 23:17:31 +0900
+Subject: nilfs2: fix potential use after free in nilfs_gccache_submit_read_data()
+
+From: Pan Bian <bianpan2016@163.com>
+
+commit 7ee29facd8a9c5a26079148e36bcf07141b3a6bc upstream.
+
+In nilfs_gccache_submit_read_data(), brelse(bh) is called to drop the
+reference count of bh when the call to nilfs_dat_translate() fails.  If
+the reference count hits 0 and its owner page gets unlocked, bh may be
+freed.  However, bh->b_page is dereferenced to put the page after that,
+which may result in a use-after-free bug.  This patch moves the release
+operation after unlocking and putting the page.
+
+NOTE: The function in question is only called in GC, and in combination
+with current userland tools, address translation using DAT does not occur
+in that function, so the code path that causes this issue will not be
+executed.  However, it is possible to run that code path by intentionally
+modifying the userland GC library or by calling the GC ioctl directly.
+
+[konishi.ryusuke@gmail.com: NOTE added to the commit log]
+Link: https://lkml.kernel.org/r/1543201709-53191-1-git-send-email-bianpan2016@163.com
+Link: https://lkml.kernel.org/r/20230921141731.10073-1-konishi.ryusuke@gmail.com
+Fixes: a3d93f709e89 ("nilfs2: block cache for garbage collection")
+Signed-off-by: Pan Bian <bianpan2016@163.com>
+Reported-by: Ferry Meng <mengferry@linux.alibaba.com>
+Closes: https://lkml.kernel.org/r/20230818092022.111054-1-mengferry@linux.alibaba.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/gcinode.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/nilfs2/gcinode.c
++++ b/fs/nilfs2/gcinode.c
+@@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struc
+               struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ 
+               err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
+-              if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
+-                      brelse(bh);
++              if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */
+                       goto failed;
+-              }
+       }
+ 
+       lock_buffer(bh);
+@@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struc
+  failed:
+       unlock_page(bh->b_page);
+       put_page(bh->b_page);
++      if (unlikely(err))
++              brelse(bh);
+       return err;
+ }
+ 
diff --git a/queue-6.1/revert-tty-n_gsm-fix-uaf-in-gsm_cleanup_mux.patch b/queue-6.1/revert-tty-n_gsm-fix-uaf-in-gsm_cleanup_mux.patch

new file mode 100644 (file)

index 0000000..15c922b
--- /dev/null
+++ b/queue-6.1/revert-tty-n_gsm-fix-uaf-in-gsm_cleanup_mux.patch
@@ -0,0 +1,68 @@
+From 29346e217b8ab8a52889b88f00b268278d6b7668 Mon Sep 17 00:00:00 2001
+From: Daniel Starke <daniel.starke@siemens.com>
+Date: Thu, 14 Sep 2023 07:15:07 +0200
+Subject: Revert "tty: n_gsm: fix UAF in gsm_cleanup_mux"
+
+From: Daniel Starke <daniel.starke@siemens.com>
+
+commit 29346e217b8ab8a52889b88f00b268278d6b7668 upstream.
+
+This reverts commit 9b9c8195f3f0d74a826077fc1c01b9ee74907239.
+
+The commit above is reverted as it did not solve the original issue.
+
+gsm_cleanup_mux() tries to free up the virtual ttys by calling
+gsm_dlci_release() for each available DLCI. There, dlci_put() is called to
+decrease the reference counter for the DLCI via tty_port_put() which
+finally calls gsm_dlci_free(). This already clears the pointer which is
+being checked in gsm_cleanup_mux() before calling gsm_dlci_release().
+Therefore, it is not necessary to clear this pointer in gsm_cleanup_mux()
+as done in the reverted commit. The commit introduces a null pointer
+dereference:
+ <TASK>
+ ? __die+0x1f/0x70
+ ? page_fault_oops+0x156/0x420
+ ? search_exception_tables+0x37/0x50
+ ? fixup_exception+0x21/0x310
+ ? exc_page_fault+0x69/0x150
+ ? asm_exc_page_fault+0x26/0x30
+ ? tty_port_put+0x19/0xa0
+ gsmtty_cleanup+0x29/0x80 [n_gsm]
+ release_one_tty+0x37/0xe0
+ process_one_work+0x1e6/0x3e0
+ worker_thread+0x4c/0x3d0
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0xe1/0x110
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x2f/0x50
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1b/0x30
+ </TASK>
+
+The actual issue is that nothing guards dlci_put() from being called
+multiple times while the tty driver was triggered but did not yet finished
+calling gsm_dlci_free().
+
+Fixes: 9b9c8195f3f0 ("tty: n_gsm: fix UAF in gsm_cleanup_mux")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
+Link: https://lore.kernel.org/r/20230914051507.3240-1-daniel.starke@siemens.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/n_gsm.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/tty/n_gsm.c
++++ b/drivers/tty/n_gsm.c
+@@ -2509,10 +2509,8 @@ static void gsm_cleanup_mux(struct gsm_m
+               gsm->has_devices = false;
+       }
+       for (i = NUM_DLCI - 1; i >= 0; i--)
+-              if (gsm->dlci[i]) {
++              if (gsm->dlci[i])
+                       gsm_dlci_release(gsm->dlci[i]);
+-                      gsm->dlci[i] = NULL;
+-              }
+       mutex_unlock(&gsm->mutex);
+       /* Now wipe the queues */
+       tty_ldisc_flush(gsm->tty);
diff --git a/queue-6.1/serial-8250_port-check-irq-data-before-use.patch b/queue-6.1/serial-8250_port-check-irq-data-before-use.patch

new file mode 100644 (file)

index 0000000..e363daf
--- /dev/null
+++ b/queue-6.1/serial-8250_port-check-irq-data-before-use.patch
@@ -0,0 +1,49 @@
+From cce7fc8b29961b64fadb1ce398dc5ff32a79643b Mon Sep 17 00:00:00 2001
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Date: Fri, 1 Sep 2023 01:25:55 +0300
+Subject: serial: 8250_port: Check IRQ data before use
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+commit cce7fc8b29961b64fadb1ce398dc5ff32a79643b upstream.
+
+In case the leaf driver wants to use IRQ polling (irq = 0) and
+IIR register shows that an interrupt happened in the 8250 hardware
+the IRQ data can be NULL. In such a case we need to skip the wake
+event as we came to this path from the timer interrupt and quite
+likely system is already awake.
+
+Without this fix we have got an Oops:
+
+    serial8250: ttyS0 at I/O 0x3f8 (irq = 0, base_baud = 115200) is a 16550A
+    ...
+    BUG: kernel NULL pointer dereference, address: 0000000000000010
+    RIP: 0010:serial8250_handle_irq+0x7c/0x240
+    Call Trace:
+     ? serial8250_handle_irq+0x7c/0x240
+     ? __pfx_serial8250_timeout+0x10/0x10
+
+Fixes: 0ba9e3a13c6a ("serial: 8250: Add missing wakeup event reporting")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://lore.kernel.org/r/20230831222555.614426-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/serial/8250/8250_port.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -1953,7 +1953,10 @@ int serial8250_handle_irq(struct uart_po
+               skip_rx = true;
+ 
+       if (status & (UART_LSR_DR | UART_LSR_BI) && !skip_rx) {
+-              if (irqd_is_wakeup_set(irq_get_irq_data(port->irq)))
++              struct irq_data *d;
++
++              d = irq_get_irq_data(port->irq);
++              if (d && irqd_is_wakeup_set(d))
+                       pm_wakeup_event(tport->tty->dev, 0);
+               if (!up->dma || handle_rx_dma(up, iir))
+                       status = serial8250_rx_chars(up, status);
diff --git a/queue-6.1/series b/queue-6.1/series

index 14de272cd4bacaf8a85095ed01f16f6c2fdbd748..262811aff368ebac6ea2f05afbd7dc32698392fc 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -206,3 +206,14 @@ nvme-pci-always-return-an-err_ptr-from-nvme_pci_allo.patch
  smack-record-transmuting-in-smk_transmuted.patch
  smack-retrieve-transmuting-information-in-smack_inod.patch
  iommu-arm-smmu-v3-fix-soft-lockup-triggered-by-arm_s.patch
+x86-sgx-resolves-secs-reclaim-vs.-page-fault-for-eaug-race.patch
+x86-srso-add-srso-mitigation-for-hygon-processors.patch
+kvm-svm-intercept_rdtscp-is-never-intercepted-anyway.patch
+kvm-svm-fix-tsc_aux-virtualization-setup.patch
+kvm-x86-mmu-open-code-leaf-invalidation-from-mmu_notifier.patch
+kvm-x86-mmu-do-not-filter-address-spaces-in-for_each_tdp_mmu_root_yield_safe.patch
+mptcp-fix-bogus-receive-window-shrinkage-with-multiple-subflows.patch
+misc-rtsx-fix-some-platforms-can-not-boot-and-move-the-l1ss-judgment-to-probe.patch
+revert-tty-n_gsm-fix-uaf-in-gsm_cleanup_mux.patch
+serial-8250_port-check-irq-data-before-use.patch
+nilfs2-fix-potential-use-after-free-in-nilfs_gccache_submit_read_data.patch
diff --git a/queue-6.1/x86-sgx-resolves-secs-reclaim-vs.-page-fault-for-eaug-race.patch b/queue-6.1/x86-sgx-resolves-secs-reclaim-vs.-page-fault-for-eaug-race.patch

new file mode 100644 (file)

index 0000000..bae3377
--- /dev/null
+++ b/queue-6.1/x86-sgx-resolves-secs-reclaim-vs.-page-fault-for-eaug-race.patch
@@ -0,0 +1,117 @@
+From c6c2adcba50c2622ed25ba5d5e7f05f584711358 Mon Sep 17 00:00:00 2001
+From: Haitao Huang <haitao.huang@linux.intel.com>
+Date: Thu, 27 Jul 2023 22:10:24 -0700
+Subject: x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race
+
+From: Haitao Huang <haitao.huang@linux.intel.com>
+
+commit c6c2adcba50c2622ed25ba5d5e7f05f584711358 upstream.
+
+The SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC page for an
+enclave and set secs.epc_page to NULL. The SECS page is used for EAUG
+and ELDU in the SGX page fault handler. However, the NULL check for
+secs.epc_page is only done for ELDU, not EAUG before being used.
+
+Fix this by doing the same NULL check and reloading of the SECS page as
+needed for both EAUG and ELDU.
+
+The SECS page holds global enclave metadata. It can only be reclaimed
+when there are no other enclave pages remaining. At that point,
+virtually nothing can be done with the enclave until the SECS page is
+paged back in.
+
+An enclave can not run nor generate page faults without a resident SECS
+page. But it is still possible for a #PF for a non-SECS page to race
+with paging out the SECS page: when the last resident non-SECS page A
+triggers a #PF in a non-resident page B, and then page A and the SECS
+both are paged out before the #PF on B is handled.
+
+Hitting this bug requires that race triggered with a #PF for EAUG.
+Following is a trace when it happens.
+
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
+Call Trace:
+ ? __kmem_cache_alloc_node+0x16a/0x440
+ ? xa_load+0x6e/0xa0
+ sgx_vma_fault+0x119/0x230
+ __do_fault+0x36/0x140
+ do_fault+0x12f/0x400
+ __handle_mm_fault+0x728/0x1110
+ handle_mm_fault+0x105/0x310
+ do_user_addr_fault+0x1ee/0x750
+ ? __this_cpu_preempt_check+0x13/0x20
+ exc_page_fault+0x76/0x180
+ asm_exc_page_fault+0x27/0x30
+
+Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
+Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Acked-by: Reinette Chatre <reinette.chatre@intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20230728051024.33063-1-haitao.huang%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/sgx/encl.c | 30 +++++++++++++++++++++++++-----
+ 1 file changed, 25 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
+index 91fa70e51004..279148e72459 100644
+--- a/arch/x86/kernel/cpu/sgx/encl.c
++++ b/arch/x86/kernel/cpu/sgx/encl.c
+@@ -235,6 +235,21 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
+       return epc_page;
+ }
+ 
++/*
++ * Ensure the SECS page is not swapped out.  Must be called with encl->lock
++ * to protect the enclave states including SECS and ensure the SECS page is
++ * not swapped out again while being used.
++ */
++static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
++{
++      struct sgx_epc_page *epc_page = encl->secs.epc_page;
++
++      if (!epc_page)
++              epc_page = sgx_encl_eldu(&encl->secs, NULL);
++
++      return epc_page;
++}
++
+ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
+                                                 struct sgx_encl_page *entry)
+ {
+@@ -248,11 +263,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
+               return entry;
+       }
+ 
+-      if (!(encl->secs.epc_page)) {
+-              epc_page = sgx_encl_eldu(&encl->secs, NULL);
+-              if (IS_ERR(epc_page))
+-                      return ERR_CAST(epc_page);
+-      }
++      epc_page = sgx_encl_load_secs(encl);
++      if (IS_ERR(epc_page))
++              return ERR_CAST(epc_page);
+ 
+       epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
+       if (IS_ERR(epc_page))
+@@ -339,6 +352,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
+ 
+       mutex_lock(&encl->lock);
+ 
++      epc_page = sgx_encl_load_secs(encl);
++      if (IS_ERR(epc_page)) {
++              if (PTR_ERR(epc_page) == -EBUSY)
++                      vmret = VM_FAULT_NOPAGE;
++              goto err_out_unlock;
++      }
++
+       epc_page = sgx_alloc_epc_page(encl_page, false);
+       if (IS_ERR(epc_page)) {
+               if (PTR_ERR(epc_page) == -EBUSY)
+-- 
+2.42.0
+
diff --git a/queue-6.1/x86-srso-add-srso-mitigation-for-hygon-processors.patch b/queue-6.1/x86-srso-add-srso-mitigation-for-hygon-processors.patch

new file mode 100644 (file)

index 0000000..9bfad00
--- /dev/null
+++ b/queue-6.1/x86-srso-add-srso-mitigation-for-hygon-processors.patch
@@ -0,0 +1,33 @@
+From a5ef7d68cea1344cf524f04981c2b3f80bedbb0d Mon Sep 17 00:00:00 2001
+From: Pu Wen <puwen@hygon.cn>
+Date: Thu, 28 Sep 2023 14:59:16 +0800
+Subject: x86/srso: Add SRSO mitigation for Hygon processors
+
+From: Pu Wen <puwen@hygon.cn>
+
+commit a5ef7d68cea1344cf524f04981c2b3f80bedbb0d upstream.
+
+Add mitigation for the speculative return stack overflow vulnerability
+which exists on Hygon processors too.
+
+Signed-off-by: Pu Wen <puwen@hygon.cn>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/tencent_4A14812842F104E93AA722EC939483CEFF05@qq.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1282,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_
+       VULNBL_AMD(0x15, RETBLEED),
+       VULNBL_AMD(0x16, RETBLEED),
+       VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
+-      VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
++      VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
+       VULNBL_AMD(0x19, SRSO),
+       {}
+ };
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Oct 2023 14:12:41 +0000 (16:12 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Oct 2023 14:12:41 +0000 (16:12 +0200)
queue-6.1/kvm-svm-fix-tsc_aux-virtualization-setup.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-svm-intercept_rdtscp-is-never-intercepted-anyway.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-x86-mmu-do-not-filter-address-spaces-in-for_each_tdp_mmu_root_yield_safe.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-x86-mmu-open-code-leaf-invalidation-from-mmu_notifier.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/misc-rtsx-fix-some-platforms-can-not-boot-and-move-the-l1ss-judgment-to-probe.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-fix-bogus-receive-window-shrinkage-with-multiple-subflows.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/nilfs2-fix-potential-use-after-free-in-nilfs_gccache_submit_read_data.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/revert-tty-n_gsm-fix-uaf-in-gsm_cleanup_mux.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/serial-8250_port-check-irq-data-before-use.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/x86-sgx-resolves-secs-reclaim-vs.-page-fault-for-eaug-race.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/x86-srso-add-srso-mitigation-for-hygon-processors.patch	[new file with mode: 0644]	patch \| blob