KVM: x86: Defer non-architectural deliver of exception payload to userspace read

author Sean Christopherson <seanjc@google.com>

Wed, 18 Feb 2026 00:54:38 +0000 (16:54 -0800)

committer Sean Christopherson <seanjc@google.com>

Mon, 2 Mar 2026 17:53:50 +0000 (09:53 -0800)
author Sean Christopherson <seanjc@google.com>
Wed, 18 Feb 2026 00:54:38 +0000 (16:54 -0800)
committer Sean Christopherson <seanjc@google.com>
Mon, 2 Mar 2026 17:53:50 +0000 (09:53 -0800)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index a03530795707797b73c701b0c952d46d27389e8d..6e87ec52fa0640622638c48d4707419542338392 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -864,9 +864,6 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
                 vcpu->arch.exception.error_code = error_code;
                 vcpu->arch.exception.has_payload = has_payload;
                 vcpu->arch.exception.payload = payload;
-               if (!is_guest_mode(vcpu))
-                       kvm_deliver_exception_payload(vcpu,
-                                                     &vcpu->arch.exception);
                 return;
         }
  
@@ -5531,18 +5528,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
         return 0;
  }
  
-static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
-                                              struct kvm_vcpu_events *events)
+static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu)
  {
-       struct kvm_queued_exception *ex;
-
-       process_nmi(vcpu);
-
-#ifdef CONFIG_KVM_SMM
-       if (kvm_check_request(KVM_REQ_SMI, vcpu))
-               process_smi(vcpu);
-#endif
-
         /*
          * KVM's ABI only allows for one exception to be migrated.  Luckily,
          * the only time there can be two queued exceptions is if there's a
@@ -5553,21 +5540,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
         if (vcpu->arch.exception_vmexit.pending &&
             !vcpu->arch.exception.pending &&
             !vcpu->arch.exception.injected)
-               ex = &vcpu->arch.exception_vmexit;
-       else
-               ex = &vcpu->arch.exception;
+               return &vcpu->arch.exception_vmexit;
+
+       return &vcpu->arch.exception;
+}
+
+static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu)
+{
+       struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
  
         /*
-        * In guest mode, payload delivery should be deferred if the exception
-        * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
-        * intercepts #PF, ditto for DR6 and #DBs.  If the per-VM capability,
-        * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
-        * propagate the payload and so it cannot be safely deferred.  Deliver
-        * the payload if the capability hasn't been requested.
+        * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver
+        * the pending exception payload when userspace saves *any* vCPU state
+        * that interacts with exception payloads to avoid breaking userspace.
+        *
+        * Architecturally, KVM must not deliver an exception payload until the
+        * exception is actually injected, e.g. to avoid losing pending #DB
+        * information (which VMX tracks in the VMCS), and to avoid clobbering
+        * state if the exception is never injected for whatever reason.  But
+        * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or
+        * may not propagate the payload across save+restore, and so KVM can't
+        * safely defer delivery of the payload.
          */
         if (!vcpu->kvm->arch.exception_payload_enabled &&
             ex->pending && ex->has_payload)
                 kvm_deliver_exception_payload(vcpu, ex);
+}
+
+static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+                                              struct kvm_vcpu_events *events)
+{
+       struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
+
+       process_nmi(vcpu);
+
+#ifdef CONFIG_KVM_SMM
+       if (kvm_check_request(KVM_REQ_SMI, vcpu))
+               process_smi(vcpu);
+#endif
+
+       kvm_handle_exception_payload_quirk(vcpu);
  
         memset(events, 0, sizeof(*events));
  
@@ -5746,6 +5758,8 @@ static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
             vcpu->arch.guest_state_protected)
                 return -EINVAL;
  
+       kvm_handle_exception_payload_quirk(vcpu);
+
         memset(dbgregs, 0, sizeof(*dbgregs));
  
         BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
@@ -12136,6 +12150,8 @@ static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
         if (vcpu->arch.guest_state_protected)
                 goto skip_protected_regs;
  
+       kvm_handle_exception_payload_quirk(vcpu);
+
         kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
         kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
         kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
author	Sean Christopherson <seanjc@google.com>
	Wed, 18 Feb 2026 00:54:38 +0000 (16:54 -0800)
committer	Sean Christopherson <seanjc@google.com>
	Mon, 2 Mar 2026 17:53:50 +0000 (09:53 -0800)