]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: SVM: Fix nested NPF injection of PFERR_GUEST_{PAGE,FINAL}_MASK bits
authorKevin Cheng <chengkev@google.com>
Fri, 22 May 2026 23:26:59 +0000 (16:26 -0700)
committerSean Christopherson <seanjc@google.com>
Tue, 26 May 2026 21:54:19 +0000 (14:54 -0700)
Fix KVM's generation of PFERR_GUEST_{PAGE,FINAL}_MASK bits when injecting a
Nested Page Fault into L1.  Currently, KVM blindly stuffs GUEST_FINAL into
L1, which is blatantly wrong given that KVM obviously generates NPFs for
page table accesses.

There are two paths that trigger NPF injection: hardware NPF exits (from
L2) and emulation-triggered faults, i.e. when KVM detects a NPF as part of
emulating an L2 GVA access.  For the hardware case, use the bits verbatim
from the VMCB, as KVM is simply forwarding a NPF to L1.  For the emulation
case, propagate the GUEST_{PAGE,FINAL} bits from the access field (which
were recently added for MBEC+GMET support).

To differentiate between the two cases, add "hardware_nested_page_fault"
to "struct x86_exception", and set it when injecting a NPF in response to
an NPF exit from L2.

To help guard against future goofs, assert that exactly one of GUEST_PAGE
or GUEST_FINAL is set when injecting a NPF.  Unlike VMX, there are no
(known) cases where hardware doesn't set either bit, and KVM should always
set one or the other when emulating a GVA access.

Signed-off-by: Kevin Cheng <chengkev@google.com>
[sean: use plumbed in @access bits, massage changelog]
Link: https://patch.msgid.link/20260522232701.3671446-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/svm/nested.c

index 29fec7c59a6f47e79d69b4232f7f928c5c65b6f3..7114707b9856bfe091041aa8d7186de8b9a38a34 100644 (file)
@@ -284,6 +284,8 @@ enum x86_intercept_stage;
 #define PFERR_GUEST_RMP_MASK   BIT_ULL(31)
 #define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
 #define PFERR_GUEST_PAGE_MASK  BIT_ULL(33)
+#define PFERR_GUEST_FAULT_STAGE_MASK \
+       (PFERR_GUEST_FINAL_MASK | PFERR_GUEST_PAGE_MASK)
 #define PFERR_GUEST_ENC_MASK   BIT_ULL(34)
 #define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
 #define PFERR_GUEST_VMPL_MASK  BIT_ULL(36)
index cc9c7deb34bcc001ae34f551c3dd9c0df932e61f..66eee6914234a08eae812b7bf7454cf5182ee4cc 100644 (file)
@@ -397,16 +397,6 @@ retry_walk:
                                             nested_access | PFERR_GUEST_PAGE_MASK,
                                             &walker->fault, 0);
 
-               /*
-                * FIXME: This can happen if emulation (for of an INS/OUTS
-                * instruction) triggers a nested page fault.  The exit
-                * qualification / exit info field will incorrectly have
-                * "guest page access" as the nested page fault's cause,
-                * instead of "guest page structure access".  To fix this,
-                * the x86_exception struct should be augmented with enough
-                * information to fix the exit_qualification or exit_info_1
-                * fields.
-                */
                if (unlikely(real_gpa == INVALID_GPA))
                        return 0;
 
@@ -548,6 +538,11 @@ error:
        walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
        walker->fault.async_page_fault = false;
 
+#if PTTYPE != PTTYPE_EPT
+       if (walker->fault.nested_page_fault)
+               walker->fault.error_code |= access & PFERR_GUEST_FAULT_STAGE_MASK;
+#endif
+
        trace_kvm_mmu_walker_error(walker->fault.error_code);
        return 0;
 }
index edb15f9c6403e6fd99bf6e7f136defb606e55fb2..997a740c653d2db8cb2d43eaa8a37df66363de51 100644 (file)
@@ -39,19 +39,32 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb *vmcb = svm->vmcb;
+       u64 fault_stage;
 
-       if (vmcb->control.exit_code != SVM_EXIT_NPF) {
-               /*
-                * TODO: track the cause of the nested page fault, and
-                * correctly fill in the high bits of exit_info_1.
-                */
-               vmcb->control.exit_code = SVM_EXIT_NPF;
-               vmcb->control.exit_info_1 = (1ULL << 32);
-               vmcb->control.exit_info_2 = fault->address;
-       }
+       /*
+        * For hardware NPF exits, the GUEST_FAULT_STAGE bits are only
+        * available in the hardware exit_info_1, since the guest_mmu
+        * walker doesn't know whether the faulting GPA was a page table
+        * page or final page from L2's perspective.
+        */
+       if (from_hardware)
+               fault_stage = vmcb->control.exit_info_1 &
+                             PFERR_GUEST_FAULT_STAGE_MASK;
+       else
+               fault_stage = fault->error_code & PFERR_GUEST_FAULT_STAGE_MASK;
+
+       /*
+        * All nested page faults should be annotated as occurring on the
+        * final translation *or* the page walk. Arbitrarily choose "final"
+        * if KVM is buggy and enumerated both or neither.
+        */
+       if (WARN_ON_ONCE(hweight64(fault_stage) != 1))
+               fault_stage = PFERR_GUEST_FINAL_MASK;
 
-       vmcb->control.exit_info_1 &= ~0xffffffffULL;
-       vmcb->control.exit_info_1 |= fault->error_code;
+       vmcb->control.exit_code = SVM_EXIT_NPF;
+       vmcb->control.exit_info_1 = fault_stage |
+                                   (fault->error_code & ~PFERR_GUEST_FAULT_STAGE_MASK);
+       vmcb->control.exit_info_2 = fault->address;
 
        nested_svm_vmexit(svm);
 }