6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 3 May 2026 12:44:42 +0000 (14:44 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 3 May 2026 12:44:42 +0000 (14:44 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 May 2026 12:44:42 +0000 (14:44 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 May 2026 12:44:42 +0000 (14:44 +0200)
diff --git a/queue-6.1/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch b/queue-6.1/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch

new file mode 100644 (file)

index 0000000..c9ffee5
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
@@ -0,0 +1,69 @@
+From 96bd3e76a171a8e21a6387e54e4c420a81968492 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:10 +0000
+Subject: KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 96bd3e76a171a8e21a6387e54e4c420a81968492 upstream.
+
+According to the APM Volume #2, 15.5, Canonicalization and Consistency
+Checks (24593—Rev. 3.42—March 2024), the following condition (among
+others) results in a #VMEXIT with VMEXIT_INVALID (aka SVM_EXIT_ERR):
+
+  EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
+
+In the list of consistency checks done when EFER.LME and CR0.PG are set,
+add a check that CS.L and CS.D are not both set, after the existing
+check that CR4.PAE is set.
+
+This is functionally a nop because the nested VMRUN results in
+SVM_EXIT_ERR in HW, which is forwarded to L1, but KVM makes all
+consistency checks before a VMRUN is actually attempted.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-17-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++++
+ arch/x86/kvm/svm/svm.h    |    1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -292,6 +292,10 @@ static bool __nested_vmcb_check_save(str
+                   CC(!(save->cr0 & X86_CR0_PE)) ||
+                   CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
+                       return false;
++
++              if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) &&
++                     (save->cs.attrib & SVM_SELECTOR_DB_MASK)))
++                      return false;
+       }
+ 
+       /* Note, SVM doesn't have any additional restrictions on CR4. */
+@@ -378,6 +382,8 @@ static void __nested_copy_vmcb_save_to_c
+        * Copy only fields that are validated, as we need them
+        * to avoid TOC/TOU races.
+        */
++      to->cs = from->cs;
++
+       to->efer = from->efer;
+       to->cr0 = from->cr0;
+       to->cr3 = from->cr3;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -118,6 +118,7 @@ struct kvm_vmcb_info {
+ };
+ 
+ struct vmcb_save_area_cached {
++      struct vmcb_seg cs;
+       u64 efer;
+       u64 cr4;
+       u64 cr3;
diff --git a/queue-6.1/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch b/queue-6.1/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch

new file mode 100644 (file)

index 0000000..5175bdf
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
@@ -0,0 +1,49 @@
+From b71138fcc362c67ebe66747bb22cb4e6b4d6a651 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:09 +0000
+Subject: KVM: nSVM: Add missing consistency check for nCR3 validity
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b71138fcc362c67ebe66747bb22cb4e6b4d6a651 upstream.
+
+From the APM Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
+
+  When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
+  following conditions are considered illegal state combinations, in
+  addition to those mentioned in “Canonicalization and Consistency Checks”:
+      • Any MBZ bit of nCR3 is set.
+      • Any G_PAT.PA field has an unsupported type encoding or any
+        reserved field in G_PAT has a nonzero value.
+
+Add the consistency check for nCR3 being a legal GPA with no MBZ bits
+set.  Note, the G_PAT.PA check is being handled separately[*].
+
+Link: https://lore.kernel.org/kvm/20260205214326.1029278-3-jmattson@google.com [*]
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-16-yosry@kernel.org
+[sean: capture everything in CC(), massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -257,6 +257,10 @@ static bool __nested_vmcb_check_controls
+       if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+               return false;
+ 
++      if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
++             !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
++              return false;
++
+       if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+                                          MSRPM_SIZE)))
+               return false;
diff --git a/queue-6.1/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch b/queue-6.1/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch

new file mode 100644 (file)

index 0000000..389e33a
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
@@ -0,0 +1,41 @@
+From 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:59 +0000
+Subject: KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 upstream.
+
+nested_svm_vmrun() currently only injects a #GP if kvm_vcpu_map() fails
+with -EINVAL. But it could also fail with -EFAULT if creating a host
+mapping failed. Inject a #GP in all cases, no reason to treat failure
+modes differently.
+
+Fixes: 8c5fbf1a7231 ("KVM/nSVM: Use the new mapping API for mapping guest memory")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-6-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -816,12 +816,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+       }
+ 
+       vmcb12_gpa = svm->vmcb->save.rax;
+-      ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
+-      if (ret == -EINVAL) {
++      if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+-      } else if (ret) {
+-              return kvm_skip_emulated_instruction(vcpu);
+       }
+ 
+       ret = kvm_skip_emulated_instruction(vcpu);
diff --git a/queue-6.1/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch b/queue-6.1/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch

new file mode 100644 (file)

index 0000000..b7598a1
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
@@ -0,0 +1,33 @@
+From f85a6ce06e4a0d49652f57967a649ab09e06287c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:04 +0000
+Subject: KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID)
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit f85a6ce06e4a0d49652f57967a649ab09e06287c upstream.
+
+According to the APM, GIF is set to 0 on any #VMEXIT, including
+an #VMEXIT(INVALID) due to failed consistency checks. Clear GIF on
+consistency check failures.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-11-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -837,6 +837,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+               vmcb12->control.exit_code_hi = -1u;
+               vmcb12->control.exit_info_1  = 0;
+               vmcb12->control.exit_info_2  = 0;
++              svm_set_gif(svm, false);
+               goto out;
+       }
+ 
diff --git a/queue-6.1/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch b/queue-6.1/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch

new file mode 100644 (file)

index 0000000..1d10b7c
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
@@ -0,0 +1,64 @@
+From 8998e1d012f3f45d0456f16706682cef04c3c436 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:06 +0000
+Subject: KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8998e1d012f3f45d0456f16706682cef04c3c436 upstream.
+
+KVM clears tracking of L1->L2 injected NMIs (i.e. nmi_l1_to_l2) and soft
+IRQs (i.e. soft_int_injected) on a synthesized #VMEXIT(INVALID) due to
+failed VMRUN. However, they are not explicitly cleared in other
+synthesized #VMEXITs.
+
+soft_int_injected is always cleared after the first VMRUN of L2 when
+completing interrupts, as any re-injection is then tracked by KVM
+(instead of purely in vmcb02).
+
+nmi_l1_to_l2 is not cleared after the first VMRUN if NMI injection
+failed, as KVM still needs to keep track that the NMI originated from L1
+to avoid blocking NMIs for L1. It is only cleared when the NMI injection
+succeeds.
+
+KVM could synthesize a #VMEXIT to L1 before successfully injecting the
+NMI into L2 (e.g. due to a #NPF on L2's NMI handler in L1's NPTs). In
+this case, nmi_l1_to_l2 will remain true, and KVM may not correctly mask
+NMIs and intercept IRET when injecting an NMI into L1.
+
+Clear both nmi_l1_to_l2 and soft_int_injected in nested_svm_vmexit(), i.e.
+for all #VMEXITs except those that occur due to failed consistency checks,
+as those happen before nmi_l1_to_l2 or soft_int_injected are set.
+
+Fixes: 159fc6fa3b7d ("KVM: nSVM: Transparently handle L1 -> L2 NMI re-injection")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-13-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -864,8 +864,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ 
+ out_exit_err:
+       svm->nested.nested_run_pending = 0;
+-      svm->nmi_l1_to_l2 = false;
+-      svm->soft_int_injected = false;
+ 
+       svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
+       svm->vmcb->control.exit_code_hi = -1u;
+@@ -1131,6 +1129,10 @@ void svm_free_nested(struct vcpu_svm *sv
+       __free_page(virt_to_page(svm->nested.vmcb02.ptr));
+       svm->nested.vmcb02.ptr = NULL;
+ 
++      /* Drop tracking for L1->L2 injected NMIs and soft IRQs */
++      svm->nmi_l1_to_l2 = false;
++      svm->soft_int_injected = false;
++
+       /*
+        * When last_vmcb12_gpa matches the current vmcb12 gpa,
+        * some vmcb12 fields are not loaded if they are marked clean
diff --git a/queue-6.1/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch b/queue-6.1/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch

new file mode 100644 (file)

index 0000000..eb4a41d
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
@@ -0,0 +1,42 @@
+From 24f7d36b824b65cf1a2db3db478059187b2a37b0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 24 Feb 2026 22:50:17 +0000
+Subject: KVM: nSVM: Ensure AVIC is inhibited when restoring a vCPU to guest mode
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 24f7d36b824b65cf1a2db3db478059187b2a37b0 upstream.
+
+On nested VMRUN, KVM ensures AVIC is inhibited by requesting
+KVM_REQ_APICV_UPDATE, triggering a check of inhibit reasons, finding
+APICV_INHIBIT_REASON_NESTED, and disabling AVIC.
+
+However, when KVM_SET_NESTED_STATE is performed on a vCPU not in guest
+mode with AVIC enabled, KVM_REQ_APICV_UPDATE is not requested, and AVIC
+is not inhibited.
+
+Request KVM_REQ_APICV_UPDATE in the KVM_SET_NESTED_STATE path if AVIC is
+active, similar to the nested VMRUN path.
+
+Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260224225017.3303870-1-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1682,6 +1682,9 @@ static int svm_set_nested_state(struct k
+ 
+       svm->nested.force_msr_bitmap_recalc = true;
+ 
++      if (kvm_vcpu_apicv_active(vcpu))
++              kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++
+       kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+       ret = 0;
+ out_free:
diff --git a/queue-6.1/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch b/queue-6.1/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch

new file mode 100644 (file)

index 0000000..a040d79
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
@@ -0,0 +1,42 @@
+From e63fb1379f4b9300a44739964e69549bebbcdca4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 10 Feb 2026 01:08:06 +0000
+Subject: KVM: nSVM: Mark all of vmcb02 dirty when restoring nested state
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit e63fb1379f4b9300a44739964e69549bebbcdca4 upstream.
+
+When restoring a vCPU in guest mode, any state restored before
+KVM_SET_NESTED_STATE (e.g. KVM_SET_SREGS) will mark the corresponding
+dirty bits in vmcb01, as it is the active VMCB before switching to
+vmcb02 in svm_set_nested_state().
+
+Hence, mark all fields in vmcb02 dirty in svm_set_nested_state() to
+capture any previously restored fields.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260210010806.3204289-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1661,6 +1661,12 @@ static int svm_set_nested_state(struct k
+       nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
+ 
+       /*
++       * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
++       * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here.
++       */
++      vmcb_mark_all_dirty(svm->vmcb);
++
++      /*
+        * While the nested guest CR3 is already checked and set by
+        * KVM_SET_SREGS, it was set when nested state was yet loaded,
+        * thus MMU might not be initialized correctly.
diff --git a/queue-6.1/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-6.1/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch

new file mode 100644 (file)

index 0000000..13968d2
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
@@ -0,0 +1,52 @@
+From 03bee264f8ebfd39e0254c98e112d033a7aa9055 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:44 +0000
+Subject: KVM: nSVM: Sync interrupt shadow to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 03bee264f8ebfd39e0254c98e112d033a7aa9055 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+int_state is also written by the CPU, specifically bit 0 (i.e.
+SVM_INTERRUPT_SHADOW_MASK) for nested VMs, but it is not sync'd to
+cached vmcb12. This does not cause a problem if KVM_SET_NESTED_STATE
+preceeds KVM_SET_VCPU_EVENTS in the restore path, as an interrupt shadow
+would be correctly restored to vmcb02 (KVM_SET_VCPU_EVENTS overwrites
+what KVM_SET_NESTED_STATE restored in int_state).
+
+However, if KVM_SET_VCPU_EVENTS preceeds KVM_SET_NESTED_STATE, an
+interrupt shadow would be restored into vmcb01 instead of vmcb02. This
+would mostly be benign for L1 (delays an interrupt), but not for L2. For
+L2, the vCPU could hang (e.g. if a wakeup interrupt is delivered before
+a HLT that should have been in an interrupt shadow).
+
+Sync int_state to the cached vmcb12 in nested_sync_control_from_vmcb02()
+to avoid this problem. With that, KVM_SET_NESTED_STATE restores the
+correct interrupt shadow state, and if KVM_SET_VCPU_EVENTS follows it
+would overwrite it with the same value.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -400,6 +400,7 @@ void nested_sync_control_from_vmcb02(str
+       u32 mask;
+       svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
+       svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
++      svm->nested.ctl.int_state       = svm->vmcb->control.int_state;
+ 
+       /* Only a few fields of int_ctl are written by the processor.  */
+       mask = V_IRQ_MASK | V_TPR_MASK;
diff --git a/queue-6.1/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-6.1/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch

new file mode 100644 (file)

index 0000000..19aa537
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
@@ -0,0 +1,55 @@
+From 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:43 +0000
+Subject: KVM: nSVM: Sync NextRIP to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+NextRIP is also written by the CPU (in some cases) after VMRUN, but is
+not sync'd to the cached vmcb12. As a result, it is corrupted after
+save/restore (replaced by the original value written by L1 on nested
+VMRUN). This could cause problems for both KVM (e.g. when injecting a
+soft IRQ) or L1 (e.g. when using NextRIP to advance RIP after emulating
+an instruction).
+
+Fix this by sync'ing NextRIP to the cache after VMRUN of L2, but only
+after completing interrupts (not in nested_sync_control_from_vmcb02()),
+as KVM may update NextRIP (e.g. when re-injecting a soft IRQ).
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4146,6 +4146,16 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+ 
+       svm_complete_interrupts(vcpu);
+ 
++      /*
++       * Update the cache after completing interrupts to get an accurate
++       * NextRIP, e.g. when re-injecting a soft interrupt.
++       *
++       * FIXME: Rework svm_get_nested_state() to not pull data from the
++       *        cache (except for maybe int_ctl).
++       */
++      if (is_guest_mode(vcpu))
++              svm->nested.ctl.next_rip = svm->vmcb->control.next_rip;
++
+       return svm_exit_handlers_fastpath(vcpu);
+ }
+ 
diff --git a/queue-6.1/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch b/queue-6.1/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch

new file mode 100644 (file)

index 0000000..2d97e0e
--- /dev/null
+++ b/queue-6.1/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
@@ -0,0 +1,73 @@
+From 5c247d08bc81bbad4c662dcf5654137a2f8483ec Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 3 Feb 2026 20:10:10 +0000
+Subject: KVM: nSVM: Use vcpu->arch.cr2 when updating vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit 5c247d08bc81bbad4c662dcf5654137a2f8483ec upstream.
+
+KVM currently uses the value of CR2 from vmcb02 to update vmcb12 on
+nested #VMEXIT. This value is incorrect in some cases, causing L1 to run
+L2 with a corrupted CR2. This could lead to segfaults or data corruption
+if L2 is in the middle of handling a #PF and reads a corrupted CR2. Use
+the correct value in vcpu->arch.cr2 instead.
+
+The value in vcpu->arch.cr2 is sync'd to vmcb02 shortly before a VMRUN
+of L2, and sync'd back to vcpu->arch.cr2 shortly after. The value are
+only out-of-sync in two cases: after save+restore, and after a #PF is
+injected into L2. In either case, if a #VMEXIT to L1 is synthesized
+before L2 runs, using the value in vmcb02 would be incorrect.
+
+After save+restore, the value of CR2 is restored by KVM_SET_SREGS into
+vcpu->arch.cr2. It is not reflect in vmcb02 until a VMRUN of L2. Before
+that, it holds whatever was in vmcb02 before restore, which would be
+zero on a new vCPU that never ran nested. If a #VMEXIT to L1 is
+synthesized before L2 ever runs, using vcpu->arch.cr2 to update vmcb12
+is the right thing to do.
+
+The #PF injection case is more nuanced.  Although the APM is a bit
+unclear about when CR2 is written during a #PF, the SDM is more clear:
+
+       Processors update CR2 whenever a page fault is detected. If a
+       second page fault occurs while an earlier page fault is being
+       delivered, the faulting linear address of the second fault will
+       overwrite the contents of CR2 (replacing the previous address).
+       These updates to CR2 occur even if the page fault results in a
+       double fault or occurs during the delivery of a double fault.
+
+KVM injecting the exception surely counts as the #PF being "detected".
+More importantly, when an exception is injected into L2 at the time of a
+synthesized #VMEXIT, KVM updates exit_int_info in vmcb12 accordingly,
+such that an L1 hypervisor can re-inject the exception. If CR2 is not
+written at that point, the L1 hypervisor have no way of correctly
+re-injecting the #PF. Hence, if a #VMEXIT to L1 is synthesized after
+the #PF is injected into L2 but before it actually runs, using
+vcpu->arch.cr2 to update vmcb12 is also the right thing to do.
+
+Note that KVM does _not_ update vcpu->arch.cr2 when a #PF is pending for
+L2, only when it is injected. The distinction is important, because only
+injected (but not intercepted) exceptions are propagated to L1 through
+exit_int_info. It would be incorrect to update CR2 in vmcb12 for a
+pending #PF, as L1 would perceive an updated CR2 value with no #PF.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260203201010.1871056-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -958,7 +958,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       vmcb12->save.efer   = svm->vcpu.arch.efer;
+       vmcb12->save.cr0    = kvm_read_cr0(vcpu);
+       vmcb12->save.cr3    = kvm_read_cr3(vcpu);
+-      vmcb12->save.cr2    = vmcb02->save.cr2;
++      vmcb12->save.cr2    = vcpu->arch.cr2;
+       vmcb12->save.cr4    = svm->vcpu.arch.cr4;
+       vmcb12->save.rflags = kvm_get_rflags(vcpu);
+       vmcb12->save.rip    = kvm_rip_read(vcpu);
diff --git a/queue-6.1/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch b/queue-6.1/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch

new file mode 100644 (file)

index 0000000..984bf5c
--- /dev/null
+++ b/queue-6.1/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
@@ -0,0 +1,42 @@
+From d5bde6113aed8315a2bfe708730b721be9c2f48b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Feb 2026 15:09:51 -0800
+Subject: KVM: SVM: Explicitly mark vmcb01 dirty after modifying VMCB intercepts
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d5bde6113aed8315a2bfe708730b721be9c2f48b upstream.
+
+When reacting to an intercept update, explicitly mark vmcb01's intercepts
+dirty, as KVM always initially operates on vmcb01, and nested_svm_vmexit()
+isn't guaranteed to mark VMCB_INTERCEPTS as dirty.  I.e. if L2 is active,
+KVM will modify the intercepts for L1, but might not mark them as dirty
+before the next VMRUN of L1.
+
+Fixes: 116a0a23676e ("KVM: SVM: Add clean-bit for intercetps, tsc-offset and pause filter count")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218230958.2877682-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -128,11 +128,13 @@ void recalc_intercepts(struct vcpu_svm *
+       struct vmcb_ctrl_area_cached *g;
+       unsigned int i;
+ 
+-      vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++      vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS);
+ 
+       if (!is_guest_mode(&svm->vcpu))
+               return;
+ 
++      vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
+       c = &svm->vmcb->control;
+       h = &svm->vmcb01.ptr->control;
+       g = &svm->nested.ctl;
diff --git a/queue-6.1/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch b/queue-6.1/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch

new file mode 100644 (file)

index 0000000..24bd205
--- /dev/null
+++ b/queue-6.1/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
@@ -0,0 +1,36 @@
+From d99df02ff427f461102230f9c5b90a6c64ee8e23 Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Sat, 28 Feb 2026 03:33:26 +0000
+Subject: KVM: SVM: Inject #UD for INVLPGA if EFER.SVME=0
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit d99df02ff427f461102230f9c5b90a6c64ee8e23 upstream.
+
+INVLPGA should cause a #UD when EFER.SVME is not set. Add a check to
+properly inject #UD when EFER.SVME=0.
+
+Fixes: ff092385e828 ("KVM: SVM: Implement INVLPGA")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260228033328.2285047-3-chengkev@google.com
+[sean: tag for stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2399,6 +2399,9 @@ static int invlpga_interception(struct k
+       gva_t gva = kvm_rax_read(vcpu);
+       u32 asid = kvm_rcx_read(vcpu);
+ 
++      if (nested_svm_check_permissions(vcpu))
++              return 1;
++
+       /* FIXME: Handle an address size prefix. */
+       if (!is_long_mode(vcpu))
+               gva = (u32)gva;
diff --git a/queue-6.1/mmc-block-use-single-block-write-in-retry.patch b/queue-6.1/mmc-block-use-single-block-write-in-retry.patch

new file mode 100644 (file)

index 0000000..c0d7dde
--- /dev/null
+++ b/queue-6.1/mmc-block-use-single-block-write-in-retry.patch
@@ -0,0 +1,92 @@
+From c7c6d4f5103864f73ee3a78bfd6da241f84197dd Mon Sep 17 00:00:00 2001
+From: Bin Liu <b-liu@ti.com>
+Date: Wed, 25 Mar 2026 08:49:47 -0500
+Subject: mmc: block: use single block write in retry
+
+From: Bin Liu <b-liu@ti.com>
+
+commit c7c6d4f5103864f73ee3a78bfd6da241f84197dd upstream.
+
+Due to errata i2493[0], multi-block write would still fail in retries.
+
+With i2493, the MMC interface has the potential of write failures when
+issuing multi-block writes operating in HS200 mode with excessive IO
+supply noise.
+
+While the errata provides guidance in hardware design and layout to
+minimize the IO supply noise, in theory the write failure cannot be
+resolved in hardware. The software solution to ensure the data integrity
+is to add minimum 5us delay between block writes. Single-block write is
+the practical way to introduce the delay.
+
+This patch reuses recovery_mode flag, and switches to single-block
+write in retry when multi-block write fails. It covers both CQE and
+non-CQE cases.
+
+[0] https://www.ti.com/lit/pdf/sprz582
+Cc: stable@vger.kernel.org
+Suggested-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Bin Liu <b-liu@ti.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/core/block.c |   12 ++++++++++--
+ drivers/mmc/core/queue.h |    3 +++
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -1401,6 +1401,9 @@ static void mmc_blk_data_prep(struct mmc
+                   rq_data_dir(req) == WRITE &&
+                   (md->flags & MMC_BLK_REL_WR);
+ 
++      if (mqrq->flags & MQRQ_XFER_SINGLE_BLOCK)
++              recovery_mode = 1;
++
+       memset(brq, 0, sizeof(struct mmc_blk_request));
+ 
+       mmc_crypto_prepare_req(mqrq);
+@@ -1540,10 +1543,13 @@ static void mmc_blk_cqe_complete_rq(stru
+               err = 0;
+ 
+       if (err) {
+-              if (mqrq->retries++ < MMC_CQE_RETRIES)
++              if (mqrq->retries++ < MMC_CQE_RETRIES) {
++                      if (rq_data_dir(req) == WRITE)
++                              mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+                       blk_mq_requeue_request(req, true);
+-              else
++              } else {
+                       blk_mq_end_request(req, BLK_STS_IOERR);
++              }
+       } else if (mrq->data) {
+               if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered))
+                       blk_mq_requeue_request(req, true);
+@@ -2081,6 +2087,8 @@ static void mmc_blk_mq_complete_rq(struc
+       } else if (!blk_rq_bytes(req)) {
+               __blk_mq_end_request(req, BLK_STS_IOERR);
+       } else if (mqrq->retries++ < MMC_MAX_RETRIES) {
++              if (rq_data_dir(req) == WRITE)
++                      mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+               blk_mq_requeue_request(req, true);
+       } else {
+               if (mmc_card_removed(mq->card))
+--- a/drivers/mmc/core/queue.h
++++ b/drivers/mmc/core/queue.h
+@@ -61,6 +61,8 @@ enum mmc_drv_op {
+       MMC_DRV_OP_GET_EXT_CSD,
+ };
+ 
++#define       MQRQ_XFER_SINGLE_BLOCK          BIT(0)
++
+ struct mmc_queue_req {
+       struct mmc_blk_request  brq;
+       struct scatterlist      *sg;
+@@ -69,6 +71,7 @@ struct mmc_queue_req {
+       void                    *drv_op_data;
+       unsigned int            ioc_count;
+       int                     retries;
++      u32                     flags;
+ };
+ 
+ struct mmc_queue {
diff --git a/queue-6.1/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch b/queue-6.1/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch

new file mode 100644 (file)

index 0000000..0410353
--- /dev/null
+++ b/queue-6.1/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
@@ -0,0 +1,81 @@
+From 6546a49bbe656981d99a389195560999058c89c4 Mon Sep 17 00:00:00 2001
+From: Shawn Lin <shawn.lin@rock-chips.com>
+Date: Wed, 8 Apr 2026 15:18:49 +0800
+Subject: mmc: sdhci-of-dwcmshc: Disable clock before DLL configuration
+
+From: Shawn Lin <shawn.lin@rock-chips.com>
+
+commit 6546a49bbe656981d99a389195560999058c89c4 upstream.
+
+According to the ASIC design recommendations, the clock must be
+disabled before operating the DLL to prevent glitches that could
+affect the internal digital logic. In extreme cases, failing to
+do so may cause the controller to malfunction completely.
+
+Adds a step to disable the clock before DLL configuration and
+re-enables it at the end.
+
+Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-of-dwcmshc.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
+@@ -235,12 +235,15 @@ static void dwcmshc_rk3568_set_clock(str
+       extra &= ~BIT(0);
+       sdhci_writel(host, extra, reg);
+ 
++      /* Disable clock while config DLL */
++      sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
++
+       if (clock <= 52000000) {
+               if (host->mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+                   host->mmc->ios.timing == MMC_TIMING_MMC_HS400) {
+                       dev_err(mmc_dev(host->mmc),
+                               "Can't reduce the clock below 52MHz in HS200/HS400 mode");
+-                      return;
++                      goto enable_clk;
+               }
+ 
+               /*
+@@ -260,7 +263,7 @@ static void dwcmshc_rk3568_set_clock(str
+                       DLL_STRBIN_DELAY_NUM_SEL |
+                       DLL_STRBIN_DELAY_NUM_DEFAULT << DLL_STRBIN_DELAY_NUM_OFFSET;
+               sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
+-              return;
++              goto enable_clk;
+       }
+ 
+       /* Reset DLL */
+@@ -287,7 +290,7 @@ static void dwcmshc_rk3568_set_clock(str
+                                500 * USEC_PER_MSEC);
+       if (err) {
+               dev_err(mmc_dev(host->mmc), "DLL lock timeout!\n");
+-              return;
++              goto enable_clk;
+       }
+ 
+       extra = 0x1 << 16 | /* tune clock stop en */
+@@ -320,6 +323,16 @@ static void dwcmshc_rk3568_set_clock(str
+               DLL_STRBIN_TAPNUM_DEFAULT |
+               DLL_STRBIN_TAPNUM_FROM_SW;
+       sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
++
++enable_clk:
++      /*
++       * The sdclk frequency select bits in SDHCI_CLOCK_CONTROL are not functional
++       * on Rockchip's SDHCI implementation. Instead, the clock frequency is fully
++       * controlled via external clk provider by calling clk_set_rate(). Consequently,
++       * passing 0 to sdhci_enable_clk() only re-enables the already-configured clock,
++       * which matches the hardware's actual behavior.
++       */
++      sdhci_enable_clk(host, 0);
+ }
+ 
+ static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask)
diff --git a/queue-6.1/randomize_kstack-maintain-kstack_offset-per-task.patch b/queue-6.1/randomize_kstack-maintain-kstack_offset-per-task.patch

new file mode 100644 (file)

index 0000000..02480b1
--- /dev/null
+++ b/queue-6.1/randomize_kstack-maintain-kstack_offset-per-task.patch
@@ -0,0 +1,155 @@
+From 37beb42560165869838e7d91724f3e629db64129 Mon Sep 17 00:00:00 2001
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 3 Mar 2026 15:08:38 +0000
+Subject: randomize_kstack: Maintain kstack_offset per task
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+commit 37beb42560165869838e7d91724f3e629db64129 upstream.
+
+kstack_offset was previously maintained per-cpu, but this caused a
+couple of issues. So let's instead make it per-task.
+
+Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
+expected and required to be called with interrupts and preemption
+disabled so that it could manipulate per-cpu state. But arm64, loongarch
+and risc-v are calling them with interrupts and preemption enabled. I
+don't _think_ this causes any functional issues, but it's certainly
+unexpected and could lead to manipulating the wrong cpu's state, which
+could cause a minor performance degradation due to bouncing the cache
+lines. By maintaining the state per-task those functions can safely be
+called in preemptible context.
+
+Issue 2: add_random_kstack_offset() is called before executing the
+syscall and expands the stack using a previously chosen random offset.
+choose_random_kstack_offset() is called after executing the syscall and
+chooses and stores a new random offset for the next syscall. With
+per-cpu storage for this offset, an attacker could force cpu migration
+during the execution of the syscall and prevent the offset from being
+updated for the original cpu such that it is predictable for the next
+syscall on that cpu. By maintaining the state per-task, this problem
+goes away because the per-task random offset is updated after the
+syscall regardless of which cpu it is executing on.
+
+Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
+Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
+Cc: stable@vger.kernel.org
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com
+Signed-off-by: Kees Cook <kees@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/randomize_kstack.h |   26 +++++++++++++++-----------
+ include/linux/sched.h            |    4 ++++
+ init/main.c                      |    1 -
+ kernel/fork.c                    |    2 ++
+ 4 files changed, 21 insertions(+), 12 deletions(-)
+
+--- a/include/linux/randomize_kstack.h
++++ b/include/linux/randomize_kstack.h
+@@ -9,7 +9,6 @@
+ 
+ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+                        randomize_kstack_offset);
+-DECLARE_PER_CPU(u32, kstack_offset);
+ 
+ /*
+  * Do not use this anywhere else in the kernel. This is used here because
+@@ -44,15 +43,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
+  * add_random_kstack_offset - Increase stack utilization by previously
+  *                          chosen random offset
+  *
+- * This should be used in the syscall entry path when interrupts and
+- * preempt are disabled, and after user registers have been stored to
+- * the stack. For testing the resulting entropy, please see:
+- * tools/testing/selftests/lkdtm/stack-entropy.sh
++ * This should be used in the syscall entry path after user registers have been
++ * stored to the stack. Preemption may be enabled. For testing the resulting
++ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
+  */
+ #define add_random_kstack_offset() do {                                       \
+       if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+                               &randomize_kstack_offset)) {            \
+-              u32 offset = raw_cpu_read(kstack_offset);               \
++              u32 offset = current->kstack_offset;                    \
+               u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));   \
+               /* Keep allocation even after "ptr" loses scope. */     \
+               asm volatile("" :: "r"(ptr) : "memory");                \
+@@ -63,9 +61,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
+  * choose_random_kstack_offset - Choose the random offset for the next
+  *                             add_random_kstack_offset()
+  *
+- * This should only be used during syscall exit when interrupts and
+- * preempt are disabled. This position in the syscall flow is done to
+- * frustrate attacks from userspace attempting to learn the next offset:
++ * This should only be used during syscall exit. Preemption may be enabled. This
++ * position in the syscall flow is done to frustrate attacks from userspace
++ * attempting to learn the next offset:
+  * - Maximize the timing uncertainty visible from userspace: if the
+  *   offset is chosen at syscall entry, userspace has much more control
+  *   over the timing between choosing offsets. "How long will we be in
+@@ -79,14 +77,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ #define choose_random_kstack_offset(rand) do {                                \
+       if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+                               &randomize_kstack_offset)) {            \
+-              u32 offset = raw_cpu_read(kstack_offset);               \
++              u32 offset = current->kstack_offset;                    \
+               offset = ror32(offset, 5) ^ (rand);                     \
+-              raw_cpu_write(kstack_offset, offset);                   \
++              current->kstack_offset = offset;                        \
+       }                                                               \
+ } while (0)
++
++static inline void random_kstack_task_init(struct task_struct *tsk)
++{
++      tsk->kstack_offset = 0;
++}
+ #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ #define add_random_kstack_offset()            do { } while (0)
+ #define choose_random_kstack_offset(rand)     do { } while (0)
++#define random_kstack_task_init(tsk)          do { } while (0)
+ #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ 
+ #endif
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1488,6 +1488,10 @@ struct task_struct {
+       unsigned long                   prev_lowest_stack;
+ #endif
+ 
++#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
++      u32                             kstack_offset;
++#endif
++
+ #ifdef CONFIG_X86_MCE
+       void __user                     *mce_vaddr;
+       __u64                           mce_kflags;
+--- a/init/main.c
++++ b/init/main.c
+@@ -880,7 +880,6 @@ static void __init mm_init(void)
+ #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+ DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+                          randomize_kstack_offset);
+-DEFINE_PER_CPU(u32, kstack_offset);
+ 
+ static int __init early_randomize_kstack_offset(char *buf)
+ {
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -94,6 +94,7 @@
+ #include <linux/thread_info.h>
+ #include <linux/stackleak.h>
+ #include <linux/kasan.h>
++#include <linux/randomize_kstack.h>
+ #include <linux/scs.h>
+ #include <linux/io_uring.h>
+ #include <linux/bpf.h>
+@@ -2366,6 +2367,7 @@ static __latent_entropy struct task_stru
+       if (retval)
+               goto bad_fork_cleanup_io;
+ 
++      random_kstack_task_init(p);
+       stackleak_task_init(p);
+ 
+       if (pid != &init_struct_pid) {
diff --git a/queue-6.1/rtc-ntxec-fix-of-node-reference-imbalance.patch b/queue-6.1/rtc-ntxec-fix-of-node-reference-imbalance.patch

new file mode 100644 (file)

index 0000000..06f8ab7
--- /dev/null
+++ b/queue-6.1/rtc-ntxec-fix-of-node-reference-imbalance.patch
@@ -0,0 +1,41 @@
+From 30c4d2f26bb3538c328035cea2e6265c8320539e Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Tue, 7 Apr 2026 14:27:17 +0200
+Subject: rtc: ntxec: fix OF node reference imbalance
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 30c4d2f26bb3538c328035cea2e6265c8320539e upstream.
+
+The driver reuses the OF node of the parent multi-function device but
+fails to take another reference to balance the one dropped by the
+platform bus code when unbinding the MFD and deregistering the child
+devices.
+
+Fix this by using the intended helper for reusing OF nodes.
+
+Fixes: 435af89786c6 ("rtc: New driver for RTC in Netronix embedded controller")
+Cc: stable@vger.kernel.org     # 5.13
+Cc: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Link: https://patch.msgid.link/20260407122717.2676774-1-johan@kernel.org
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/rtc/rtc-ntxec.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/rtc/rtc-ntxec.c
++++ b/drivers/rtc/rtc-ntxec.c
+@@ -110,7 +110,7 @@ static int ntxec_rtc_probe(struct platfo
+       struct rtc_device *dev;
+       struct ntxec_rtc *rtc;
+ 
+-      pdev->dev.of_node = pdev->dev.parent->of_node;
++      device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+ 
+       rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+       if (!rtc)
diff --git a/queue-6.1/series b/queue-6.1/series

index 155e766b70bca9dbc11dd04390480ca62711c550..f43c6670fca722b718d16eae524c04e34f38f527 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -223,3 +223,21 @@ extract-cert-wrap-key_pass-with-ifdef-use_pkcs11_engine.patch
  tpm-avoid-wunused-but-set-variable.patch
  loongarch-show-cpu-vulnerabilites-correctly.patch
  power-supply-axp288_charger-do-not-cancel-work-before-initializing-it.patch
+randomize_kstack-maintain-kstack_offset-per-task.patch
+mmc-block-use-single-block-write-in-retry.patch
+mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
+tpm-tpm_tis-add-error-logging-for-data-transfer.patch
+rtc-ntxec-fix-of-node-reference-imbalance.patch
+userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
+kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
+kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
+kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
+kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
+kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
+kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
+kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
+kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
+kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
+kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
diff --git a/queue-6.1/tpm-tpm_tis-add-error-logging-for-data-transfer.patch b/queue-6.1/tpm-tpm_tis-add-error-logging-for-data-transfer.patch

new file mode 100644 (file)

index 0000000..ab30063
--- /dev/null
+++ b/queue-6.1/tpm-tpm_tis-add-error-logging-for-data-transfer.patch
@@ -0,0 +1,42 @@
+From 0471921e2d1043dcc6de5cffb49dd37709521abe Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:05 +0000
+Subject: tpm: tpm_tis: add error logging for data transfer
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 0471921e2d1043dcc6de5cffb49dd37709521abe upstream.
+
+Add logging to more easily determine reason for transmit failure
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-2-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -441,6 +441,8 @@ static int tpm_tis_send_data(struct tpm_
+               status = tpm_tis_status(chip);
+               if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) {
+                       rc = -EIO;
++                      dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be set. sts = 0x%08x\n",
++                              status);
+                       goto out_err;
+               }
+       }
+@@ -461,6 +463,8 @@ static int tpm_tis_send_data(struct tpm_
+       status = tpm_tis_status(chip);
+       if (!itpm && (status & TPM_STS_DATA_EXPECT) != 0) {
+               rc = -EIO;
++              dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be unset. sts = 0x%08x\n",
++                      status);
+               goto out_err;
+       }
+ 
diff --git a/queue-6.1/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch b/queue-6.1/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch

new file mode 100644 (file)

index 0000000..4ade83f
--- /dev/null
+++ b/queue-6.1/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
@@ -0,0 +1,60 @@
+From 161ce69c2c89781784b945d8e281ff2da9dede9c Mon Sep 17 00:00:00 2001
+From: "Denis M. Karpov" <komlomal@gmail.com>
+Date: Thu, 9 Apr 2026 13:33:45 +0300
+Subject: userfaultfd: allow registration of ranges below mmap_min_addr
+
+From: Denis M. Karpov <komlomal@gmail.com>
+
+commit 161ce69c2c89781784b945d8e281ff2da9dede9c upstream.
+
+The current implementation of validate_range() in fs/userfaultfd.c
+performs a hard check against mmap_min_addr.  This is redundant because
+UFFDIO_REGISTER operates on memory ranges that must already be backed by a
+VMA.
+
+Enforcing mmap_min_addr or capability checks again in userfaultfd is
+unnecessary and prevents applications like binary compilers from using
+UFFD for valid memory regions mapped by application.
+
+Remove the redundant check for mmap_min_addr.
+
+We started using UFFD instead of the classic mprotect approach in the
+binary translator to track application writes.  During development, we
+encountered this bug.  The translator cannot control where the translated
+application chooses to map its memory and if the app requires a
+low-address area, UFFD fails, whereas mprotect would work just fine.  I
+believe this is a genuine logic bug rather than an improvement, and I
+would appreciate including the fix in stable.
+
+Link: https://lore.kernel.org/20260409103345.15044-1-komlomal@gmail.com
+Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
+Signed-off-by: Denis M. Karpov <komlomal@gmail.com>
+Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
+Acked-by: Harry Yoo (Oracle) <harry@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jann Horn <jannh@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/userfaultfd.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1271,8 +1271,6 @@ static __always_inline int validate_rang
+               return -EINVAL;
+       if (!len)
+               return -EINVAL;
+-      if (start < mmap_min_addr)
+-              return -EINVAL;
+       if (start >= task_size)
+               return -EINVAL;
+       if (len > task_size - start)
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 3 May 2026 12:44:42 +0000 (14:44 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 3 May 2026 12:44:42 +0000 (14:44 +0200)
queue-6.1/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mmc-block-use-single-block-write-in-retry.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/randomize_kstack-maintain-kstack_offset-per-task.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/rtc-ntxec-fix-of-node-reference-imbalance.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/tpm-tpm_tis-add-error-logging-for-data-transfer.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch	[new file with mode: 0644]	patch \| blob