static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
-static bool __read_mostly nested_early_check = 0;
-module_param(nested_early_check, bool, S_IRUGO);
-
#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
/*
return;
vmx->nested.vmcs02_initialized = true;
- /*
- * If early consistency checks are enabled, stuff the EPT Pointer with
- * a dummy *legal* value to avoid false positives on bad control state.
- */
- if (enable_ept && nested_early_check)
- vmcs_write64(EPT_POINTER, VMX_EPTP_MT_WB | VMX_EPTP_PWL_4);
-
if (vmx->ve_info)
vmcs_write64(VE_INFORMATION_ADDRESS, __pa(vmx->ve_info));
else
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
}
-
- if (kvm_caps.has_tsc_control && nested_early_check) {
- if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
- vmcs_write64(TSC_MULTIPLIER, vmcs12->tsc_multiplier);
- else
- vmcs_write64(TSC_MULTIPLIER, 1);
- }
}
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
return 0;
}
-static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
- bool vm_fail;
-
- if (!nested_early_check)
- return 0;
-
- if (vmx->msr_autoload.host.nr)
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
- if (vmx->msr_autoload.guest.nr)
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
-
- preempt_disable();
-
- vmx_prepare_switch_to_guest(vcpu);
-
- /*
- * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
- * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
- * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
- * there is no need to preserve other bits or save/restore the field.
- */
- vmcs_writel(GUEST_RFLAGS, 0);
-
- cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->host_state.cr3 = cr3;
- }
-
- cr4 = cr4_read_shadow();
- if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
- vmcs_writel(HOST_CR4, cr4);
- vmx->loaded_vmcs->host_state.cr4 = cr4;
- }
-
- vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- __vmx_vcpu_run_flags(vmx));
-
- if (vmx->msr_autoload.host.nr)
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
- if (vmx->msr_autoload.guest.nr)
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
-
- if (vm_fail) {
- u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
-
- preempt_enable();
-
- trace_kvm_nested_vmenter_failed(
- "early hardware check VM-instruction error: ", error);
- WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
- }
-
- /*
- * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
- */
- if (hw_breakpoint_active())
- set_debugreg(__this_cpu_read(cpu_dr7), 7);
- local_irq_enable();
- preempt_enable();
-
- /*
- * A non-failing VMEntry means we somehow entered guest mode with
- * an illegal RIP, and that's just the tip of the iceberg. There
- * is no telling what memory has been modified or what state has
- * been exposed to unknown code. Hitting this all but guarantees
- * a (very critical) hardware issue.
- */
- WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
- VMX_EXIT_REASONS_FAILED_VMENTRY));
-
- return 0;
-}
-
#ifdef CONFIG_KVM_HYPERV
static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
{
&vmx->nested.pre_vmenter_ssp_tbl);
/*
- * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
- * nested early checks are disabled. In the event of a "late" VM-Fail,
- * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
- * software model to the pre-VMEntry host state. When EPT is disabled,
- * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
- * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
- * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
- * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
- * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
- * guaranteed to be overwritten with a shadow CR3 prior to re-entering
- * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
- * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
- * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
- * path would need to manually save/restore vmcs01.GUEST_CR3.
+ * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled. In the
+ * event of a "late" VM-Fail, i.e. a VM-Fail detected by hardware but
+ * not KVM, KVM must unwind its software model to the pre-VM-Entry host
+ * state. When EPT is disabled, GUEST_CR3 holds KVM's shadow CR3, not
+ * L1's "real" CR3, which causes nested_vmx_restore_host_state() to
+ * corrupt vcpu->arch.cr3. Stuffing vmcs01.GUEST_CR3 results in the
+ * unwind naturally setting arch.cr3 to the correct value. Smashing
+ * vmcs01.GUEST_CR3 is safe because nested VM-Exits, and the unwind,
+ * reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is guaranteed to be
+ * overwritten with a shadow CR3 prior to re-entering L1.
*/
- if (!enable_ept && !nested_early_check)
+ if (!enable_ept)
vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
}
- if (nested_vmx_check_vmentry_hw(vcpu)) {
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- return NVMX_VMENTRY_VMFAIL;
- }
-
if (nested_vmx_check_guest_state(vcpu, vmcs12,
&entry_failure_code)) {
exit_reason.basic = EXIT_REASON_INVALID_STATE;
/*
* The only expected VM-instruction error is "VM entry with
* invalid control field(s)." Anything else indicates a
- * problem with L0. And we should never get here with a
- * VMFail of any type if early consistency checks are enabled.
+ * problem with L0.
*/
WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- WARN_ON_ONCE(nested_early_check);
}
/*