]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: VMX: Refresh GUEST_PENDING_DBG_EXCEPTIONS.BS on all injected #DBs
authorSean Christopherson <seanjc@google.com>
Fri, 15 May 2026 22:26:29 +0000 (15:26 -0700)
committerSean Christopherson <seanjc@google.com>
Thu, 21 May 2026 21:21:27 +0000 (14:21 -0700)
Move KVM's stuffing of GUEST_PENDING_DBG_EXCEPTIONS.BS when RFLAGS.TF=1 and
MOV/POP SS or STI blocking is active into the exception injection code so
that KVM fixes up the VMCS for all injected #DBs, not only those that are
reflected back into the guest after #DB interception.  E.g. if KVM queues
a #DB in the emulator, or more importantly if userspace does save/restore
exactly on the #DB+shadow boundary, then KVM needs to massage the VMCS to
avoid the VM-Entry consistency check.

Opportunistically update the wording of the comment to describe the
behavior as a workaround of flawed CPU behavior/architecture, to make it
clear that the *only* thing KVM is doing is fudging around a consistency
check.  Per the SDM:

  There are no pending debug exceptions after VM entry if any of the
  following are true:

    * The VM entry is vectoring with one of the following interruption
      types: external interrupt, non-maskable interrupt (NMI), hardware
      exception, or privileged software exception.

I.e. forcing GUEST_PENDING_DBG_EXCEPTIONS.BS does *not* impact guest-
visible behavior.

Fixes: b9bed78e2fa9 ("KVM: VMX: Set vmcs.PENDING_DBG.BS on #DB in STI/MOVSS blocking shadow")
Cc: stable@vger.kernel.org
Reported-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Closes: https://lore.kernel.org/all/b1a294bc9ed4dae532474a5dc6c8cb6e5962de7c.1757416809.git.houwenlong.hwl@antgroup.com
Reviewed-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Link: https://patch.msgid.link/20260515222638.1949982-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/kvm/vmx/vmx.c

index d81b22359918aa52fcc03aca9c596881d40c3879..cf9f2f55f569de9cc950db6624fd70868d1ab9e6 100644 (file)
@@ -1909,6 +1909,24 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu)
        u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+       /*
+        * When injecting a #DB, single-stepping is enabled in RFLAGS, and STI
+        * or MOV-SS blocking is active, set vmcs.PENDING_DBG_EXCEPTIONS.BS to
+        * prevent a false positive from VM-Entry consistency check.  VM-Entry
+        * asserts that a single-step #DB _must_ be pending in this scenario,
+        * as the previous instruction cannot have toggled RFLAGS.TF 0=>1
+        * (because STI and POP/MOV don't modify RFLAGS), therefore the one
+        * instruction delay when activating single-step breakpoints must have
+        * already expired.  However, the CPU isn't smart enough to peek at
+        * vmcs.VM_ENTRY_INTR_INFO_FIELD and so doesn't realize that yes, there
+        * is indeed a #DB pending/imminent.
+        */
+       if (ex->vector == DB_VECTOR &&
+           (vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+           vmx_get_interrupt_shadow(vcpu))
+               vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+                           vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
+
        kvm_deliver_exception_payload(vcpu, ex);
 
        if (ex->has_error_code) {
@@ -5485,26 +5503,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
                         * avoid single-step #DB and MTF updates, as ICEBP is
                         * higher priority.  Note, skipping ICEBP still clears
                         * STI and MOVSS blocking.
-                        *
-                        * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
-                        * if single-step is enabled in RFLAGS and STI or MOVSS
-                        * blocking is active, as the CPU doesn't set the bit
-                        * on VM-Exit due to #DB interception.  VM-Entry has a
-                        * consistency check that a single-step #DB is pending
-                        * in this scenario as the previous instruction cannot
-                        * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
-                        * don't modify RFLAGS), therefore the one instruction
-                        * delay when activating single-step breakpoints must
-                        * have already expired.  Note, the CPU sets/clears BS
-                        * as appropriate for all other VM-Exits types.
                         */
                        if (is_icebp(intr_info))
                                WARN_ON(!skip_emulated_instruction(vcpu));
-                       else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
-                                (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-                                 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
-                               vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
-                                           vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
 
                        kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
                        return 1;