KVM: x86: Suppress WARNs on nested_run_pending after userspace exit

author Sean Christopherson <seanjc@google.com>

Thu, 12 Mar 2026 23:48:23 +0000 (16:48 -0700)

committer Sean Christopherson <seanjc@google.com>

Fri, 3 Apr 2026 16:34:01 +0000 (09:34 -0700)
author Sean Christopherson <seanjc@google.com>
Thu, 12 Mar 2026 23:48:23 +0000 (16:48 -0700)
committer Sean Christopherson <seanjc@google.com>
Fri, 3 Apr 2026 16:34:01 +0000 (09:34 -0700)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 19b3790e5e99ac8c14baed1ca568d07a8d47ac19..c54c969c88ee80a44c38d06d039fdf6081cd193b 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1104,8 +1104,14 @@ struct kvm_vcpu_arch {
          * can only occur at instruction boundaries.  The only exception is
          * VMX's "notify" exits, which exist in large part to break the CPU out
          * of infinite ucode loops, but can corrupt vCPU state in the process!
+        *
+        * For all intents and purposes, this is a boolean, but it's tracked as
+        * a u8 so that KVM can detect when userspace may have stuffed vCPU
+        * state and generated an architecturally-impossible VM-Exit.
          */
-       bool nested_run_pending;
+#define KVM_NESTED_RUN_PENDING                 1
+#define KVM_NESTED_RUN_PENDING_UNTRUSTED       2
+       u8 nested_run_pending;
  
  #if IS_ENABLED(CONFIG_HYPERV)
         hpa_t hv_root_tdp;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c

index e24f5450f12190c997ed700f5f7b81606dd2d85d..88e878160229b6ed2c1349217345be988dcee49c 100644 (file)
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1132,7 +1132,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
         if (!npt_enabled)
                 vmcb01->save.cr3 = kvm_read_cr3(vcpu);
  
-       vcpu->arch.nested_run_pending = 1;
+       vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
  
         if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true) ||
             !nested_svm_merge_msrpm(vcpu)) {
@@ -1278,7 +1278,8 @@ void nested_svm_vmexit(struct vcpu_svm *svm)
         /* Exit Guest-Mode */
         leave_guest_mode(vcpu);
         svm->nested.vmcb12_gpa = 0;
-       WARN_ON_ONCE(vcpu->arch.nested_run_pending);
+
+       kvm_warn_on_nested_run_pending(vcpu);
  
         kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
  
@@ -1985,8 +1986,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
  
         svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
  
-       vcpu->arch.nested_run_pending =
-               !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+       if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+               vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
+       else
+               vcpu->arch.nested_run_pending = 0;
  
         svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
  
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index dbd35340e7b0bb9f0d8a3c8e437b3a2bb1a12e88..f4b0aeba948fb2bfc73735b372207de7527b49bd 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5013,7 +5013,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
         if (ret)
                 goto unmap_save;
  
-       vcpu->arch.nested_run_pending = 1;
+       vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
  
  unmap_save:
         kvm_vcpu_unmap(vcpu, &map_save);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index 031075467a6dcfa2573935aab20eaff104f936cd..48d2991886cb98797efa1ff0cc5defd059d6cc42 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3830,7 +3830,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
          * We're finally done with prerequisite checking, and can start with
          * the nested entry.
          */
-       vcpu->arch.nested_run_pending = 1;
+       vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
         vmx->nested.has_preemption_timer_deadline = false;
         status = nested_vmx_enter_non_root_mode(vcpu, true);
         if (unlikely(status != NVMX_VMENTRY_SUCCESS))
@@ -5042,7 +5042,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
         vmx->nested.mtf_pending = false;
  
         /* trying to cancel vmlaunch/vmresume is a bug */
-       WARN_ON_ONCE(vcpu->arch.nested_run_pending);
+       kvm_warn_on_nested_run_pending(vcpu);
  
  #ifdef CONFIG_KVM_HYPERV
         if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
@@ -6665,7 +6665,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
         unsigned long exit_qual;
         u32 exit_intr_info;
  
-       WARN_ON_ONCE(vcpu->arch.nested_run_pending);
+       kvm_warn_on_nested_run_pending(vcpu);
  
         /*
          * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
@@ -6973,8 +6973,10 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
         if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
                 return 0;
  
-       vcpu->arch.nested_run_pending =
-               !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+       if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+               vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
+       else
+               vcpu->arch.nested_run_pending = 0;
  
         vmx->nested.mtf_pending =
                 !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index 9ef3fb04403d2397fdbe484dcd1f436cd63817d5..d75f6b22d74cbf5034b991738d52058becfdd7ae 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8532,7 +8532,7 @@ int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
                 if (ret)
                         return ret;
  
-               vcpu->arch.nested_run_pending = 1;
+               vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
                 vmx->nested.smm.guest_mode = false;
         }
         return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 64da02d1ee0084c1251b2c4f554b33ea65cc80c8..aa29f90c6e963b785e6558526080f0966d9724be 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11913,6 +11913,13 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  
  static int kvm_x86_vcpu_pre_run(struct kvm_vcpu *vcpu)
  {
+       /*
+        * Userspace may have modified vCPU state, mark nested_run_pending as
+        * "untrusted" to avoid triggering false-positive WARNs.
+        */
+       if (vcpu->arch.nested_run_pending == KVM_NESTED_RUN_PENDING)
+               vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
+
         /*
          * SIPI_RECEIVED is obsolete; KVM leaves the vCPU in Wait-For-SIPI and
          * tracks the pending SIPI separately.  SIPI_RECEIVED is still accepted
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h

index 94d4f07aaaa09e9ac6c401ca18ee9cf504f69d07..9fe3a53fd8bedeebc3e18ac9c8853755fa2078ca 100644 (file)
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -188,6 +188,16 @@ static inline bool kvm_can_set_cpuid_and_feature_msrs(struct kvm_vcpu *vcpu)
         return vcpu->arch.last_vmentry_cpu == -1 && !is_guest_mode(vcpu);
  }
  
+/*
+ * WARN if a nested VM-Enter is pending completion, and userspace hasn't gained
+ * control since the nested VM-Enter was initiated (in which case, userspace
+ * may have modified vCPU state to induce an architecturally invalid VM-Exit).
+ */
+static inline void kvm_warn_on_nested_run_pending(struct kvm_vcpu *vcpu)
+{
+       WARN_ON_ONCE(vcpu->arch.nested_run_pending == KVM_NESTED_RUN_PENDING);
+}
+
  static inline void kvm_set_mp_state(struct kvm_vcpu *vcpu, int mp_state)
  {
         vcpu->arch.mp_state = mp_state;
author	Sean Christopherson <seanjc@google.com>
	Thu, 12 Mar 2026 23:48:23 +0000 (16:48 -0700)
committer	Sean Christopherson <seanjc@google.com>
	Fri, 3 Apr 2026 16:34:01 +0000 (09:34 -0700)
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/svm/nested.c		patch \| blob \| blame \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| blame \| history
arch/x86/kvm/vmx/nested.c		patch \| blob \| blame \| history
arch/x86/kvm/vmx/vmx.c		patch \| blob \| blame \| history
arch/x86/kvm/x86.c		patch \| blob \| blame \| history
arch/x86/kvm/x86.h		patch \| blob \| blame \| history