]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - arch/x86/kvm/vmx/vmx.c
Merge tag 'kvm-x86-misc-6.6' of https://github.com/kvm-x86/linux into HEAD
[thirdparty/linux.git] / arch / x86 / kvm / vmx / vmx.c
index e9386afd1521fc250ebb14085b16d22004f4f7b4..af73d5d54ec8e8a6a07e8e5f700ce1ee94da9a39 100644 (file)
@@ -421,13 +421,23 @@ do {                                      \
        pr_warn_ratelimited(fmt);       \
 } while (0)
 
-void vmread_error(unsigned long field, bool fault)
+noinline void vmread_error(unsigned long field)
 {
-       if (fault)
+       vmx_insn_failed("vmread failed: field=%lx\n", field);
+}
+
+#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
+{
+       if (fault) {
                kvm_spurious_fault();
-       else
-               vmx_insn_failed("vmread failed: field=%lx\n", field);
+       } else {
+               instrumentation_begin();
+               vmread_error(field);
+               instrumentation_end();
+       }
 }
+#endif
 
 noinline void vmwrite_error(unsigned long field, unsigned long value)
 {
@@ -1517,6 +1527,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long old_rflags;
 
+       /*
+        * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
+        * is an unrestricted guest in order to mark L2 as needing emulation
+        * if L1 runs L2 as a restricted guest.
+        */
        if (is_unrestricted_guest(vcpu)) {
                kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
                vmx->rflags = rflags;
@@ -3051,6 +3066,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
 
+       /*
+        * KVM should never use VM86 to virtualize Real Mode when L2 is active,
+        * as using VM86 is unnecessary if unrestricted guest is enabled, and
+        * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
+        * should VM-Fail and KVM should reject userspace attempts to stuff
+        * CR0.PG=0 when L2 is active.
+        */
+       WARN_ON_ONCE(is_guest_mode(vcpu));
+
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
@@ -3061,13 +3085,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
        vmx->rmode.vm86_active = 1;
 
-       /*
-        * Very old userspace does not call KVM_SET_TSS_ADDR before entering
-        * vcpu. Warn the user that an update is overdue.
-        */
-       if (!kvm_vmx->tss_addr)
-               pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
-
        vmx_segment_cache_clear(vmx);
 
        vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
@@ -3240,6 +3257,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
                          CPU_BASED_CR3_STORE_EXITING)
 
+static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+       if (is_guest_mode(vcpu))
+               return nested_guest_cr0_valid(vcpu, cr0);
+
+       if (to_vmx(vcpu)->nested.vmxon)
+               return nested_host_cr0_valid(vcpu, cr0);
+
+       return true;
+}
+
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3249,7 +3277,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
 
        hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
-       if (is_unrestricted_guest(vcpu))
+       if (enable_unrestricted_guest)
                hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
        else {
                hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
@@ -3277,7 +3305,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        }
 #endif
 
-       if (enable_ept && !is_unrestricted_guest(vcpu)) {
+       if (enable_ept && !enable_unrestricted_guest) {
                /*
                 * Ensure KVM has an up-to-date snapshot of the guest's CR3.  If
                 * the below code _enables_ CR3 exiting, vmx_cache_reg() will
@@ -3329,7 +3357,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
-static int vmx_get_max_tdp_level(void)
+static int vmx_get_max_ept_level(void)
 {
        if (cpu_has_vmx_ept_5levels())
                return 5;
@@ -3408,7 +3436,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         * this bit, even if host CR4.MCE == 0.
         */
        hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
-       if (is_unrestricted_guest(vcpu))
+       if (enable_unrestricted_guest)
                hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
        else if (vmx->rmode.vm86_active)
                hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
@@ -3428,7 +3456,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        vcpu->arch.cr4 = cr4;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
 
-       if (!is_unrestricted_guest(vcpu)) {
+       if (!enable_unrestricted_guest) {
                if (enable_ept) {
                        if (!is_paging(vcpu)) {
                                hw_cr4 &= ~X86_CR4_PAE;
@@ -4657,7 +4685,8 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
        if (kvm_vmx->pid_table)
                return 0;
 
-       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
+       pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+                           vmx_get_pid_table_order(kvm));
        if (!pages)
                return -ENOMEM;
 
@@ -5370,18 +5399,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
                val = (val & ~vmcs12->cr0_guest_host_mask) |
                        (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
 
-               if (!nested_guest_cr0_valid(vcpu, val))
-                       return 1;
-
                if (kvm_set_cr0(vcpu, val))
                        return 1;
                vmcs_writel(CR0_READ_SHADOW, orig_val);
                return 0;
        } else {
-               if (to_vmx(vcpu)->nested.vmxon &&
-                   !nested_host_cr0_valid(vcpu, val))
-                       return 1;
-
                return kvm_set_cr0(vcpu, val);
        }
 }
@@ -6773,8 +6795,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
        vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
        read_unlock(&vcpu->kvm->mmu_lock);
 
-       vmx_flush_tlb_current(vcpu);
-
+       /*
+        * No need for a manual TLB flush at this point, KVM has already done a
+        * flush if there were SPTEs pointing at the previous page.
+        */
 out:
        /*
         * Do not pin apic access page in memory, the MMU notifier
@@ -7220,13 +7244,20 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                   flags);
 
        vcpu->arch.cr2 = native_read_cr2();
+       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+
+       vmx->idt_vectoring_info = 0;
 
        vmx_enable_fb_clear(vmx);
 
-       if (unlikely(vmx->fail))
+       if (unlikely(vmx->fail)) {
                vmx->exit_reason.full = 0xdead;
-       else
-               vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+               goto out;
+       }
+
+       vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+       if (likely(!vmx->exit_reason.failed_vmentry))
+               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
        if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
            is_nmi(vmx_get_intr_info(vcpu))) {
@@ -7235,6 +7266,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                kvm_after_interrupt(vcpu);
        }
 
+out:
        guest_state_exit_irqoff();
 }
 
@@ -7356,8 +7388,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        loadsegment(es, __USER_DS);
 #endif
 
-       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
-
        pt_guest_exit(vmx);
 
        kvm_load_host_xsave_state(vcpu);
@@ -7374,17 +7404,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmx->nested.nested_run_pending = 0;
        }
 
-       vmx->idt_vectoring_info = 0;
-
        if (unlikely(vmx->fail))
                return EXIT_FASTPATH_NONE;
 
        if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
                kvm_machine_check();
 
-       if (likely(!vmx->exit_reason.failed_vmentry))
-               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
        trace_kvm_exit(vcpu, KVM_ISA_VMX);
 
        if (unlikely(vmx->exit_reason.failed_vmentry))
@@ -8217,6 +8242,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .set_segment = vmx_set_segment,
        .get_cpl = vmx_get_cpl,
        .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+       .is_valid_cr0 = vmx_is_valid_cr0,
        .set_cr0 = vmx_set_cr0,
        .is_valid_cr4 = vmx_is_valid_cr4,
        .set_cr4 = vmx_set_cr4,
@@ -8510,7 +8536,7 @@ static __init int hardware_setup(void)
         */
        vmx_setup_me_spte_mask();
 
-       kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+       kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
                          ept_caps_to_lpage_level(vmx_capability.ept));
 
        /*