From: Paolo Bonzini Date: Mon, 13 Apr 2026 11:01:24 +0000 (+0200) Subject: Merge tag 'kvm-x86-nested-7.1' of https://github.com/kvm-x86/linux into HEAD X-Git-Url: http://git.ipfire.org/index.cgi?a=commitdiff_plain;h=ea8bc95fbb75da215b7533c7c46f63423e84ff5e;p=thirdparty%2Fkernel%2Flinux.git Merge tag 'kvm-x86-nested-7.1' of https://github.com/kvm-x86/linux into HEAD KVM nested SVM changes for 7.1 (with one common x86 fix) - To minimize the probability of corrupting guest state, defer KVM's non-architectural delivery of exception payloads (e.g. CR2 and DR6) until consumption of the payload is imminent, and force delivery of the payload in all paths where userspace saves relevant state. - Use vcpu->arch.cr2 when updating vmcb12's CR2 on nested #VMEXIT to fix a bug where L2's CR2 can get corrupted after a save/restore, e.g. if the VM is migrated while L2 is faulting in memory. - Fix a class of nSVM bugs where some fields written by the CPU are not synchronized from vmcb02 to cached vmcb12 after VMRUN, and so are not up-to-date when saved by KVM_GET_NESTED_STATE. - Fix a class of bugs where the ordering between KVM_SET_NESTED_STATE and KVM_SET_{S}REGS could cause vmcb02 to be incorrectly initialized after save+restore. - Add a variety of missing nSVM consistency checks. - Fix several bugs where KVM failed to correctly update VMCB fields on nested #VMEXIT. - Fix several bugs where KVM failed to correctly synthesize #UD or #GP for SVM-related instructions. - Add support for save+restore of virtualized LBRs (on SVM). - Refactor various helpers and macros to improve clarity and (hopefully) make the code easier to maintain. - Aggressively sanitize fields when copying from vmcb12 to guard against unintentionally allowing L1 to utilize yet-to-be-defined features. - Fix several bugs where KVM botched rAX legality checks when emulating SVM instructions. Note, KVM is still flawed in that KVM doesn't address size prefix overrides for 64-bit guests; this should probably be documented as a KVM erratum. - Fail emulation of VMRUN/VMLOAD/VMSAVE if mapping vmcb12 fails instead of somewhat arbitrarily synthesizing #GP (i.e. don't bastardize AMD's already- sketchy behavior of generating #GP if for "unsupported" addresses). - Cache all used vmcb12 fields to further harden against TOCTOU bugs. --- ea8bc95fbb75da215b7533c7c46f63423e84ff5e diff --cc arch/x86/kvm/svm/nested.c index b36c33255bed6,b42d95fc84990..961804df5f451 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@@ -402,31 -448,6 +448,17 @@@ static bool nested_vmcb_check_save(stru return true; } - static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu) - { - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb_save_area_cached *save = &svm->nested.save; - - return __nested_vmcb_check_save(vcpu, save); - } - - static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) ++int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl; + - return __nested_vmcb_check_controls(vcpu, ctl); - } - - int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu) - { - if (!nested_vmcb_check_save(vcpu) || - !nested_vmcb_check_controls(vcpu)) ++ if (!nested_vmcb_check_save(vcpu, &svm->nested.save) || ++ !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) + return -EINVAL; + + return 0; +} + /* * If a feature is not advertised to L1, clear the corresponding vmcb12 * intercept. @@@ -992,6 -1047,35 +1058,34 @@@ int enter_svm_guest_mode(struct kvm_vcp return 0; } + static int nested_svm_copy_vmcb12_to_cache(struct kvm_vcpu *vcpu, u64 vmcb12_gpa) + { + struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map; + struct vmcb *vmcb12; + int r = 0; + + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) + return -EFAULT; + + vmcb12 = map.hva; + nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); + nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); + - if (!nested_vmcb_check_save(vcpu, &svm->nested.save) || - !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) { ++ if (nested_svm_check_cached_vmcb12(vcpu) < 0) { + vmcb12->control.exit_code = SVM_EXIT_ERR; + vmcb12->control.exit_info_1 = 0; + vmcb12->control.exit_info_2 = 0; + vmcb12->control.event_inj = 0; + vmcb12->control.event_inj_err = 0; + svm_set_gif(svm, false); + r = -EINVAL; + } + + kvm_vcpu_unmap(vcpu, &map); + return r; + } + int nested_svm_vmrun(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); diff --cc arch/x86/kvm/svm/svm.c index d304568588c72,1e51cbb80e864..07ed964dacf57 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@@ -4880,16 -4999,12 +5000,15 @@@ static int svm_leave_smm(struct kvm_vcp vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, false); - if (ret) + if (nested_svm_check_cached_vmcb12(vcpu) < 0) + goto unmap_save; + - if (enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, - vmcb12, false) != 0) ++ if (enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, false) != 0) goto unmap_save; + ret = 0; - svm->nested.nested_run_pending = 1; + vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING; unmap_save: kvm_vcpu_unmap(vcpu, &map_save); diff --cc arch/x86/kvm/vmx/vmx.c index d16427a079f69,d75f6b22d74cb..d76a21c385060 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@@ -8528,15 -8528,11 +8528,15 @@@ int vmx_leave_smm(struct kvm_vcpu *vcpu } if (vmx->nested.smm.guest_mode) { + /* Triple fault if the state is invalid. */ + if (nested_vmx_check_restored_vmcs12(vcpu) < 0) + return 1; + ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) - return ret; + if (ret != NVMX_VMENTRY_SUCCESS) + return 1; - vmx->nested.nested_run_pending = 1; + vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING; vmx->nested.smm.guest_mode = false; } return 0;