Merge tag 'kvm-x86-mmu-6.9' of https://github.com/kvm-x86/linux into HEAD

author Paolo Bonzini <pbonzini@redhat.com>

Mon, 11 Mar 2024 14:29:22 +0000 (10:29 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Mon, 11 Mar 2024 14:29:22 +0000 (10:29 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Mon, 11 Mar 2024 14:29:22 +0000 (10:29 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Mon, 11 Mar 2024 14:29:22 +0000 (10:29 -0400)
diff --combined arch/x86/include/asm/kvm_host.h

index 6b87770ea34d5e2e6301b2374929abe875029754,7d33a2605ad50341d228ef980691a708fb684158..8116839cb263b56609e5319aaa1ef6944984054b
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -1145,8 -1145,6 +1145,8 @@@ struct kvm_hv 
         unsigned int synic_auto_eoi_used;
   
         struct kvm_hv_syndbg hv_syndbg;
+ +
+ +      bool xsaves_xsavec_checked;
   };
   #endif
   
@@@ -1468,6 -1466,15 +1468,15 @@@ struct kvm_arch 
          */
         bool shadow_root_allocated;
   
+ #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+       /*
+        * If set, the VM has (or had) an external write tracking user, and
+        * thus all write tracking metadata has been allocated, even if KVM
+        * itself isn't using write tracking.
+        */
+       bool external_write_tracking_enabled;
+ #endif
+ 
   #if IS_ENABLED(CONFIG_HYPERV)
         hpa_t   hv_root_tdp;
         spinlock_t hv_root_tdp_lock;
@@@ -1665,8 -1672,7 +1674,8 @@@ struct kvm_x86_ops 
         void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
   
         int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
- -      enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
+ +      enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+ +                                                bool force_immediate_exit);
         int (*handle_exit)(struct kvm_vcpu *vcpu,
                 enum exit_fastpath_completion exit_fastpath);
         int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@@ -1734,6 -1740,8 +1743,6 @@@
                                struct x86_exception *exception);
         void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
   
- -      void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
- -
         void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
   
         /*
@@@ -2046,7 -2054,7 +2055,7 @@@ int kvm_set_cr3(struct kvm_vcpu *vcpu, 
   int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
   int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
   int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
- -void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
+ +unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
   unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
   void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
   int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
@@@ -2239,6 -2247,7 +2248,6 @@@ extern bool kvm_find_async_pf_gfn(struc
   
   int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
   int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
- -void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
   
   void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                                      u32 size);
diff --combined arch/x86/kvm/x86.c

index e6b1b85dca8ae253f1d82a5eda1f9ddeca07d4cb,48ec889452e2b7aadb41def283fe87e9b552cbff..064862d87b9e17a09b1b8cfb7bde633f5e940437
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -1399,19 -1399,22 +1399,19 @@@ int kvm_set_dr(struct kvm_vcpu *vcpu, i
   }
   EXPORT_SYMBOL_GPL(kvm_set_dr);
   
- -void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+ +unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
   {
         size_t size = ARRAY_SIZE(vcpu->arch.db);
   
         switch (dr) {
         case 0 ... 3:
- -              *val = vcpu->arch.db[array_index_nospec(dr, size)];
- -              break;
+ +              return vcpu->arch.db[array_index_nospec(dr, size)];
         case 4:
         case 6:
- -              *val = vcpu->arch.dr6;
- -              break;
+ +              return vcpu->arch.dr6;
         case 5:
         default: /* 7 */
- -              *val = vcpu->arch.dr7;
- -              break;
+ +              return vcpu->arch.dr7;
         }
   }
   EXPORT_SYMBOL_GPL(kvm_get_dr);
@@@ -1701,17 -1704,22 +1701,17 @@@ static int do_get_msr_feature(struct kv
         struct kvm_msr_entry msr;
         int r;
   
+ +      /* Unconditionally clear the output for simplicity */
+ +      msr.data = 0;
         msr.index = index;
         r = kvm_get_msr_feature(&msr);
   
- -      if (r == KVM_MSR_RET_INVALID) {
- -              /* Unconditionally clear the output for simplicity */
- -              *data = 0;
- -              if (kvm_msr_ignored_check(index, 0, false))
- -                      r = 0;
- -      }
- -
- -      if (r)
- -              return r;
+ +      if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
+ +              r = 0;
   
         *data = msr.data;
   
- -      return 0;
+ +      return r;
   }
   
   static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@@ -1774,10 -1782,6 +1774,10 @@@ static int set_efer(struct kvm_vcpu *vc
         if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
                 kvm_mmu_reset_context(vcpu);
   
+ +      if (!static_cpu_has(X86_FEATURE_XSAVES) &&
+ +          (efer & EFER_SVME))
+ +              kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+ +
         return 0;
   }
   
@@@ -2503,7 -2507,7 +2503,7 @@@ static u64 compute_guest_tsc(struct kvm
   }
   
   #ifdef CONFIG_X86_64
- -static inline int gtod_is_based_on_tsc(int mode)
+ +static inline bool gtod_is_based_on_tsc(int mode)
   {
         return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
   }
@@@ -4577,7 -4581,7 +4577,7 @@@ static bool kvm_is_vm_type_supported(un
   {
         return type == KVM_X86_DEFAULT_VM ||
                (type == KVM_X86_SW_PROTECTED_VM &&
- -              IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
+ +              IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled);
   }
   
   int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@@ -5058,7 -5062,8 +5058,7 @@@ void kvm_arch_vcpu_put(struct kvm_vcpu 
         int idx;
   
         if (vcpu->preempted) {
- -              if (!vcpu->arch.guest_state_protected)
- -                      vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+ +              vcpu->arch.preempted_in_kernel = kvm_arch_vcpu_in_kernel(vcpu);
   
                 /*
                  * Take the srcu lock as memslots will be accessed to check the gfn
@@@ -5449,8 -5454,7 +5449,8 @@@ static int kvm_vcpu_ioctl_x86_set_vcpu_
         if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
                 vcpu->arch.nmi_pending = 0;
                 atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
- -              kvm_make_request(KVM_REQ_NMI, vcpu);
+ +              if (events->nmi.pending)
+ +                      kvm_make_request(KVM_REQ_NMI, vcpu);
         }
         static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
   
@@@ -5505,23 -5509,18 +5505,23 @@@
   static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
                                              struct kvm_debugregs *dbgregs)
   {
- -      unsigned long val;
+ +      unsigned int i;
   
         memset(dbgregs, 0, sizeof(*dbgregs));
- -      memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
- -      kvm_get_dr(vcpu, 6, &val);
- -      dbgregs->dr6 = val;
+ +
+ +      BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
+ +      for (i = 0; i < ARRAY_SIZE(vcpu->arch.db); i++)
+ +              dbgregs->db[i] = vcpu->arch.db[i];
+ +
+ +      dbgregs->dr6 = vcpu->arch.dr6;
         dbgregs->dr7 = vcpu->arch.dr7;
   }
   
   static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                                             struct kvm_debugregs *dbgregs)
   {
+ +      unsigned int i;
+ +
         if (dbgregs->flags)
                 return -EINVAL;
   
@@@ -5530,9 -5529,7 +5530,9 @@@
         if (!kvm_dr7_valid(dbgregs->dr7))
                 return -EINVAL;
   
- -      memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+ +      for (i = 0; i < ARRAY_SIZE(vcpu->arch.db); i++)
+ +              vcpu->arch.db[i] = dbgregs->db[i];
+ +
         kvm_update_dr0123(vcpu);
         vcpu->arch.dr6 = dbgregs->dr6;
         vcpu->arch.dr7 = dbgregs->dr7;
@@@ -7019,9 -7016,6 +7019,9 @@@ set_identity_unlock
                 r = -EEXIST;
                 if (kvm->arch.vpit)
                         goto create_pit_unlock;
+ +              r = -ENOENT;
+ +              if (!pic_in_kernel(kvm))
+ +                      goto create_pit_unlock;
                 r = -ENOMEM;
                 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
                 if (kvm->arch.vpit)
@@@ -8170,9 -8164,10 +8170,9 @@@ static void emulator_wbinvd(struct x86_
         kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
   }
   
- -static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
- -                          unsigned long *dest)
+ +static unsigned long emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr)
   {
- -      kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+ +      return kvm_get_dr(emul_to_vcpu(ctxt), dr);
   }
   
   static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
@@@ -8792,31 -8787,24 +8792,24 @@@ static bool reexecute_instruction(struc
   
         kvm_release_pfn_clean(pfn);
   
-       /* The instructions are well-emulated on direct mmu. */
-       if (vcpu->arch.mmu->root_role.direct) {
-               unsigned int indirect_shadow_pages;
- 
-               write_lock(&vcpu->kvm->mmu_lock);
-               indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
-               write_unlock(&vcpu->kvm->mmu_lock);
- 
-               if (indirect_shadow_pages)
-                       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
- 
-               return true;
-       }
- 
         /*
-        * if emulation was due to access to shadowed page table
-        * and it failed try to unshadow page and re-enter the
-        * guest to let CPU execute the instruction.
+        * If emulation may have been triggered by a write to a shadowed page
+        * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the
+        * guest to let the CPU re-execute the instruction in the hope that the
+        * CPU can cleanly execute the instruction that KVM failed to emulate.
          */
-       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+       if (vcpu->kvm->arch.indirect_shadow_pages)
+               kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
   
         /*
-        * If the access faults on its page table, it can not
-        * be fixed by unprotecting shadow page and it should
-        * be reported to userspace.
+        * If the failed instruction faulted on an access to page tables that
+        * are used to translate any part of the instruction, KVM can't resolve
+        * the issue by unprotecting the gfn, as zapping the shadow page will
+        * result in the instruction taking a !PRESENT page fault and thus put
+        * the vCPU into an infinite loop of page faults.  E.g. KVM will create
+        * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and
+        * then zap the SPTE to unprotect the gfn, and then do it all over
+        * again.  Report the error to userspace.
          */
         return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
   }
@@@ -9637,13 -9625,11 +9630,13 @@@ static void kvm_x86_check_cpu_compat(vo
         *(int *)ret = kvm_x86_check_processor_compatibility();
   }
   
- -static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
+ +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
   {
         u64 host_pat;
         int r, cpu;
   
+ +      guard(mutex)(&vendor_module_lock);
+ +
         if (kvm_x86_ops.hardware_enable) {
                 pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name);
                 return -EEXIST;
@@@ -9773,6 -9759,17 +9766,6 @@@ out_free_x86_emulator_cache
         kmem_cache_destroy(x86_emulator_cache);
         return r;
   }
- -
- -int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
- -{
- -      int r;
- -
- -      mutex_lock(&vendor_module_lock);
- -      r = __kvm_x86_vendor_init(ops);
- -      mutex_unlock(&vendor_module_lock);
- -
- -      return r;
- -}
   EXPORT_SYMBOL_GPL(kvm_x86_vendor_init);
   
   void kvm_x86_vendor_exit(void)
@@@ -10669,6 -10666,12 +10662,6 @@@ static void kvm_vcpu_reload_apic_access
         static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
   }
   
- -void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
- -{
- -      smp_send_reschedule(vcpu->cpu);
- -}
- -EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
- -
   /*
    * Called within kvm->srcu read side.
    * Returns 1 to let vcpu_run() continue the guest execution loop without
@@@ -10918,8 -10921,10 +10911,8 @@@ static int vcpu_enter_guest(struct kvm_
                 goto cancel_injection;
         }
   
- -      if (req_immediate_exit) {
+ +      if (req_immediate_exit)
                 kvm_make_request(KVM_REQ_EVENT, vcpu);
- -              static_call(kvm_x86_request_immediate_exit)(vcpu);
- -      }
   
         fpregs_assert_state_consistent();
         if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@@ -10950,7 -10955,7 +10943,7 @@@
                 WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
                              (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
   
- -              exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+ +              exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu, req_immediate_exit);
                 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                         break;
   
@@@ -12048,9 -12053,27 +12041,9 @@@ int kvm_arch_vcpu_create(struct kvm_vcp
         if (r < 0)
                 return r;
   
- -      if (irqchip_in_kernel(vcpu->kvm)) {
- -              r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
- -              if (r < 0)
- -                      goto fail_mmu_destroy;
- -
- -              /*
- -               * Defer evaluating inhibits until the vCPU is first run, as
- -               * this vCPU will not get notified of any changes until this
- -               * vCPU is visible to other vCPUs (marked online and added to
- -               * the set of vCPUs).  Opportunistically mark APICv active as
- -               * VMX in particularly is highly unlikely to have inhibits.
- -               * Ignore the current per-VM APICv state so that vCPU creation
- -               * is guaranteed to run with a deterministic value, the request
- -               * will ensure the vCPU gets the correct state before VM-Entry.
- -               */
- -              if (enable_apicv) {
- -                      vcpu->arch.apic->apicv_active = true;
- -                      kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
- -              }
- -      } else
- -              static_branch_inc(&kvm_has_noapic_vcpu);
+ +      r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ +      if (r < 0)
+ +              goto fail_mmu_destroy;
   
         r = -ENOMEM;
   
@@@ -12171,6 -12194,8 +12164,6 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
         srcu_read_unlock(&vcpu->kvm->srcu, idx);
         free_page((unsigned long)vcpu->arch.pio_data);
         kvfree(vcpu->arch.cpuid_entries);
- -      if (!lapic_in_kernel(vcpu))
- -              static_branch_dec(&kvm_has_noapic_vcpu);
   }
   
   void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@@ -12447,6 -12472,9 +12440,6 @@@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *v
         return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
   }
   
- -__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
- -EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
- -
   void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
   {
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@@ -13049,13 -13077,11 +13042,13 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
   
   bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
   {
- -      if (kvm_vcpu_apicv_active(vcpu) &&
- -          static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
- -              return true;
+ +      return kvm_vcpu_apicv_active(vcpu) &&
+ +             static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu);
+ +}
   
- -      return false;
+ +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+ +{
+ +      return vcpu->arch.preempted_in_kernel;
   }
   
   bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
@@@ -13078,6 -13104,9 +13071,6 @@@ bool kvm_arch_vcpu_in_kernel(struct kvm
         if (vcpu->arch.guest_state_protected)
                 return true;
   
- -      if (vcpu != kvm_get_running_vcpu())
- -              return vcpu->arch.preempted_in_kernel;
- -
         return static_call(kvm_x86_get_cpl)(vcpu) == 0;
   }
   
@@@ -13872,6 -13901,9 +13865,6 @@@ module_init(kvm_x86_init)
   
   static void __exit kvm_x86_exit(void)
   {
- -      /*
- -       * If module_init() is implemented, module_exit() must also be
- -       * implemented to allow module unload.
- -       */
+ +      WARN_ON_ONCE(static_branch_unlikely(&kvm_has_noapic_vcpu));
   }
   module_exit(kvm_x86_exit);
author	Paolo Bonzini <pbonzini@redhat.com>
	Mon, 11 Mar 2024 14:29:22 +0000 (10:29 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Mon, 11 Mar 2024 14:29:22 +0000 (10:29 -0400)
		1	2
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history