From: Yosry Ahmed Date: Thu, 12 Mar 2026 23:48:22 +0000 (-0700) Subject: KVM: x86: Move nested_run_pending to kvm_vcpu_arch X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3d4470d71fbf70576636947aba1ae51adbad5225;p=thirdparty%2Flinux.git KVM: x86: Move nested_run_pending to kvm_vcpu_arch Move nested_run_pending field present in both svm_nested_state and nested_vmx to the common kvm_vcpu_arch. This allows for common code to use without plumbing it through per-vendor helpers. nested_run_pending remains zero-initialized, as the entire kvm_vcpu struct is, and all further accesses are done through vcpu->arch instead of svm->nested or vmx->nested. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed [sean: expand the commend in the field declaration] Link: https://patch.msgid.link/20260312234823.3120658-2-seanjc@google.com Signed-off-by: Sean Christopherson --- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ff07c45e3c731..19b3790e5e99a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1098,6 +1098,15 @@ struct kvm_vcpu_arch { */ bool pdptrs_from_userspace; + /* + * Set if an emulated nested VM-Enter to L2 is pending completion. KVM + * must not synthesize a VM-Exit to L1 before entering L2, as VM-Exits + * can only occur at instruction boundaries. The only exception is + * VMX's "notify" exits, which exist in large part to break the CPU out + * of infinite ucode loops, but can corrupt vCPU state in the process! + */ + bool nested_run_pending; + #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; #endif diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3ffde1ff719bd..e24f5450f1219 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -914,7 +914,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) * the CPU and/or KVM and should be used regardless of L1's support. */ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || - !svm->nested.nested_run_pending) + !vcpu->arch.nested_run_pending) vmcb02->control.next_rip = vmcb12_ctrl->next_rip; svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); @@ -926,7 +926,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) if (is_evtinj_soft(vmcb02->control.event_inj)) { svm->soft_int_injected = true; if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || - !svm->nested.nested_run_pending) + !vcpu->arch.nested_run_pending) svm->soft_int_next_rip = vmcb12_ctrl->next_rip; } @@ -1132,11 +1132,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) if (!npt_enabled) vmcb01->save.cr3 = kvm_read_cr3(vcpu); - svm->nested.nested_run_pending = 1; + vcpu->arch.nested_run_pending = 1; if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true) || !nested_svm_merge_msrpm(vcpu)) { - svm->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; svm->nmi_l1_to_l2 = false; svm->soft_int_injected = false; @@ -1278,7 +1278,7 @@ void nested_svm_vmexit(struct vcpu_svm *svm) /* Exit Guest-Mode */ leave_guest_mode(vcpu); svm->nested.vmcb12_gpa = 0; - WARN_ON_ONCE(svm->nested.nested_run_pending); + WARN_ON_ONCE(vcpu->arch.nested_run_pending); kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); @@ -1488,7 +1488,7 @@ void svm_leave_nested(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (is_guest_mode(vcpu)) { - svm->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; svm->nested.vmcb12_gpa = INVALID_GPA; leave_guest_mode(vcpu); @@ -1673,7 +1673,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) * previously injected event, the pending exception occurred while said * event was being delivered and thus needs to be handled. */ - bool block_nested_exceptions = svm->nested.nested_run_pending; + bool block_nested_exceptions = vcpu->arch.nested_run_pending; /* * New events (not exceptions) are only recognized at instruction * boundaries. If an event needs reinjection, then KVM is handling a @@ -1848,7 +1848,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu, kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE; kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; - if (svm->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; } @@ -1985,7 +1985,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); - svm->nested.nested_run_pending = + vcpu->arch.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5e6bd7fca2982..dbd35340e7b0b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3820,7 +3820,7 @@ static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending) + if (!is_guest_mode(vcpu) || !vcpu->arch.nested_run_pending) return; /* @@ -3968,7 +3968,7 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu) static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return -EBUSY; if (svm_nmi_blocked(vcpu)) @@ -4010,7 +4010,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return -EBUSY; if (svm_interrupt_blocked(vcpu)) @@ -4222,7 +4222,7 @@ static void svm_complete_soft_interrupt(struct kvm_vcpu *vcpu, u8 vector, * the soft int and will reinject it via the standard injection flow, * and so KVM needs to grab the state from the pending nested VMRUN. */ - if (is_guest_mode(vcpu) && svm->nested.nested_run_pending) + if (is_guest_mode(vcpu) && vcpu->arch.nested_run_pending) svm_set_nested_run_soft_int_state(vcpu); /* @@ -4525,11 +4525,11 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) nested_sync_control_from_vmcb02(svm); /* Track VMRUNs that have made past consistency checking */ - if (svm->nested.nested_run_pending && + if (vcpu->arch.nested_run_pending && !svm_is_vmrun_failure(svm->vmcb->control.exit_code)) ++vcpu->stat.nested_run; - svm->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; } svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; @@ -4898,7 +4898,7 @@ bool svm_smi_blocked(struct kvm_vcpu *vcpu) static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return -EBUSY; if (svm_smi_blocked(vcpu)) @@ -5013,7 +5013,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) if (ret) goto unmap_save; - svm->nested.nested_run_pending = 1; + vcpu->arch.nested_run_pending = 1; unmap_save: kvm_vcpu_unmap(vcpu, &map_save); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index c530688486289..5b287ad83b69f 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -215,10 +215,6 @@ struct svm_nested_state { */ void *msrpm; - /* A VMRUN has started but has not yet been performed, so - * we cannot inject a nested vmexit yet. */ - bool nested_run_pending; - /* cache for control fields of the guest */ struct vmcb_ctrl_area_cached ctl; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 248635da67661..031075467a6dc 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2273,7 +2273,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { - if (vmx->nested.nested_run_pending && + if (vmx->vcpu.arch.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) return vmcs12->guest_ia32_efer; else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) @@ -2513,7 +2513,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 /* * Interrupt/Exception Fields */ - if (vmx->nested.nested_run_pending) { + if (vmx->vcpu.arch.nested_run_pending) { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs12->vm_entry_intr_info_field); vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, @@ -2621,7 +2621,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); } - if (kvm_mpx_supported() && vmx->nested.nested_run_pending && + if (kvm_mpx_supported() && vmx->vcpu.arch.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); } @@ -2718,7 +2718,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, !(evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1); } - if (vmx->nested.nested_run_pending && + if (vcpu->arch.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl & @@ -2728,13 +2728,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl); } - if (!vmx->nested.nested_run_pending || + if (!vcpu->arch.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)) vmcs_write_cet_state(vcpu, vmx->nested.pre_vmenter_s_cet, vmx->nested.pre_vmenter_ssp, vmx->nested.pre_vmenter_ssp_tbl); - if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || + if (kvm_mpx_supported() && (!vcpu->arch.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs); vmx_set_rflags(vcpu, vmcs12->guest_rflags); @@ -2747,7 +2747,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); - if (vmx->nested.nested_run_pending && + if (vcpu->arch.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); vcpu->arch.pat = vmcs12->guest_ia32_pat; @@ -3335,7 +3335,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to * CR0.PG) is 1. */ - if (to_vmx(vcpu)->nested.nested_run_pending && + if (vcpu->arch.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || @@ -3613,15 +3613,15 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, kvm_service_local_tlb_flush_requests(vcpu); - if (!vmx->nested.nested_run_pending || + if (!vcpu->arch.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read(); if (kvm_mpx_supported() && - (!vmx->nested.nested_run_pending || + (!vcpu->arch.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS); - if (!vmx->nested.nested_run_pending || + if (!vcpu->arch.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)) vmcs_read_cet_state(vcpu, &vmx->nested.pre_vmenter_s_cet, &vmx->nested.pre_vmenter_ssp, @@ -3830,7 +3830,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * We're finally done with prerequisite checking, and can start with * the nested entry. */ - vmx->nested.nested_run_pending = 1; + vcpu->arch.nested_run_pending = 1; vmx->nested.has_preemption_timer_deadline = false; status = nested_vmx_enter_non_root_mode(vcpu, true); if (unlikely(status != NVMX_VMENTRY_SUCCESS)) @@ -3862,12 +3862,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) && !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && (vmcs12->guest_rflags & X86_EFLAGS_IF))) { - vmx->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; return kvm_emulate_halt_noskip(vcpu); } break; case GUEST_ACTIVITY_WAIT_SIPI: - vmx->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED); break; default: @@ -3877,7 +3877,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; vmentry_failed: - vmx->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR) return 0; if (status == NVMX_VMENTRY_VMEXIT) @@ -4274,7 +4274,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) * previously injected event, the pending exception occurred while said * event was being delivered and thus needs to be handled. */ - bool block_nested_exceptions = vmx->nested.nested_run_pending; + bool block_nested_exceptions = vcpu->arch.nested_run_pending; /* * Events that don't require injection, i.e. that are virtualized by * hardware, aren't blocked by a pending VM-Enter as KVM doesn't need @@ -4643,7 +4643,7 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_cpu_has_preemption_timer(vmcs12) && vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER && - !vmx->nested.nested_run_pending) + !vcpu->arch.nested_run_pending) vmcs12->vmx_preemption_timer_value = vmx_get_preemption_timer_value(vcpu); @@ -5042,7 +5042,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vmx->nested.mtf_pending = false; /* trying to cancel vmlaunch/vmresume is a bug */ - WARN_ON_ONCE(vmx->nested.nested_run_pending); + WARN_ON_ONCE(vcpu->arch.nested_run_pending); #ifdef CONFIG_KVM_HYPERV if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { @@ -6665,7 +6665,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) unsigned long exit_qual; u32 exit_intr_info; - WARN_ON_ONCE(vmx->nested.nested_run_pending); + WARN_ON_ONCE(vcpu->arch.nested_run_pending); /* * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM @@ -6761,7 +6761,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu)) { kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; - if (vmx->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; if (vmx->nested.mtf_pending) @@ -6836,7 +6836,7 @@ out: void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { - to_vmx(vcpu)->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); } free_nested(vcpu); @@ -6973,7 +6973,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return 0; - vmx->nested.nested_run_pending = + vcpu->arch.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); vmx->nested.mtf_pending = @@ -7025,7 +7025,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return 0; error_guest_mode: - vmx->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; return ret; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 967b58a8ab9d0..9ef3fb04403d2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5279,7 +5279,7 @@ bool vmx_nmi_blocked(struct kvm_vcpu *vcpu) int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { - if (to_vmx(vcpu)->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return -EBUSY; /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ @@ -5306,7 +5306,7 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) { - if (to_vmx(vcpu)->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return -EBUSY; /* @@ -6118,7 +6118,7 @@ static bool vmx_unhandleable_emulation_required(struct kvm_vcpu *vcpu) * only reachable if userspace modifies L2 guest state after KVM has * performed the nested VM-Enter consistency checks. */ - if (vmx->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return true; /* @@ -6802,7 +6802,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) * invalid guest state should never happen as that means KVM knowingly * allowed a nested VM-Enter with an invalid vmcs12. More below. */ - if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm)) + if (KVM_BUG_ON(vcpu->arch.nested_run_pending, vcpu->kvm)) return -EIO; if (is_guest_mode(vcpu)) { @@ -7730,11 +7730,11 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) * Track VMLAUNCH/VMRESUME that have made past guest state * checking. */ - if (vmx->nested.nested_run_pending && + if (vcpu->arch.nested_run_pending && !vmx_get_exit_reason(vcpu).failed_vmentry) ++vcpu->stat.nested_run; - vmx->nested.nested_run_pending = 0; + vcpu->arch.nested_run_pending = 0; } if (unlikely(vmx->fail)) @@ -8491,7 +8491,7 @@ void vmx_setup_mce(struct kvm_vcpu *vcpu) int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ - if (to_vmx(vcpu)->nested.nested_run_pending) + if (vcpu->arch.nested_run_pending) return -EBUSY; return !is_smm(vcpu); } @@ -8532,7 +8532,7 @@ int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) if (ret) return ret; - vmx->nested.nested_run_pending = 1; + vcpu->arch.nested_run_pending = 1; vmx->nested.smm.guest_mode = false; } return 0; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 70bfe81dea540..db84e8001da58 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -138,9 +138,6 @@ struct nested_vmx { */ bool enlightened_vmcs_enabled; - /* L2 must run next, and mustn't decide to exit to L1. */ - bool nested_run_pending; - /* Pending MTF VM-exit into L1. */ bool mtf_pending;