KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry

author Paolo Bonzini <pbonzini@redhat.com>

Sun, 5 Aug 2018 14:07:47 +0000 (16:07 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 15 Aug 2018 16:13:00 +0000 (18:13 +0200)
author Paolo Bonzini <pbonzini@redhat.com>
Sun, 5 Aug 2018 14:07:47 +0000 (16:07 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Aug 2018 16:13:00 +0000 (18:13 +0200)
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst

index 5dadb4503ec991860e70efb878869fb9957c71af..bae52b845de0b93af644ea55103d5a912dfca753 100644 (file)
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/l1tf.rst
@@ -546,6 +546,27 @@ available:
      EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
      parameter.
  
+3.4. Nested virtual machines
+""""""""""""""""""""""""""""
+
+When nested virtualization is in use, three operating systems are involved:
+the bare metal hypervisor, the nested hypervisor and the nested virtual
+machine.  VMENTER operations from the nested hypervisor into the nested
+guest will always be processed by the bare metal hypervisor. If KVM is the
+bare metal hypervisor it wiil:
+
+ - Flush the L1D cache on every switch from the nested hypervisor to the
+   nested virtual machine, so that the nested hypervisor's secrets are not
+   exposed to the nested virtual machine;
+
+ - Flush the L1D cache on every switch from the nested virtual machine to
+   the nested hypervisor; this is a complex operation, and flushing the L1D
+   cache avoids that the bare metal hypervisor's secrets are exposed to the
+   nested virtual machine;
+
+ - Instruct the nested hypervisor to not perform any L1D cache flush. This
+   is an optimization to avoid double L1D flushing.
+
  
  .. _default_mitigations:
  
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index d6ff1b132b4749ee2e0d665344fecebacdbbb9f1..4015b88383ce98ad5cbc828f098bb0d3b8b16598 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1374,6 +1374,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
  void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
  
+u64 kvm_get_arch_capabilities(void);
  void kvm_define_shared_msr(unsigned index, u32 msr);
  int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
  
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 63ec03108b5de0bfab3072d360e54ae12c0f3ce9..f015ca3997d9216c28c1db88b1e7e7cf0e6de64f 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5910,8 +5910,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                 ++vmx->nmsrs;
         }
  
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+       vmx->arch_capabilities = kvm_get_arch_capabilities();
  
         vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index fcc058edfabaf9fca49aa1c26c292aeaaec4065b..5c2c09f6c1c31310ad0e8ab9c124bb29540fb377 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1054,11 +1054,35 @@ static u32 msr_based_features[] = {
  
  static unsigned int num_msr_based_features;
  
+u64 kvm_get_arch_capabilities(void)
+{
+       u64 data;
+
+       rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+
+       /*
+        * If we're doing cache flushes (either "always" or "cond")
+        * we will do one whenever the guest does a vmlaunch/vmresume.
+        * If an outer hypervisor is doing the cache flush for us
+        * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
+        * capability to the guest too, and if EPT is disabled we're not
+        * vulnerable.  Overall, only VMENTER_L1D_FLUSH_NEVER will
+        * require a nested hypervisor to do a flush of its own.
+        */
+       if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
+               data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
+
+       return data;
+}
+EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+
  static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
  {
         switch (msr->index) {
-       case MSR_IA32_UCODE_REV:
         case MSR_IA32_ARCH_CAPABILITIES:
+               msr->data = kvm_get_arch_capabilities();
+               break;
+       case MSR_IA32_UCODE_REV:
                 rdmsrl_safe(msr->index, &msr->data);
                 break;
         default:
author	Paolo Bonzini <pbonzini@redhat.com>
	Sun, 5 Aug 2018 14:07:47 +0000 (16:07 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 15 Aug 2018 16:13:00 +0000 (18:13 +0200)
Documentation/admin-guide/l1tf.rst		patch \| blob \| blame \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/vmx.c		patch \| blob \| blame \| history
arch/x86/kvm/x86.c		patch \| blob \| blame \| history