KVM: VMX: Bundle all L1 data cache flush mitigation code together

author Sean Christopherson <seanjc@google.com>

Thu, 13 Nov 2025 23:37:44 +0000 (15:37 -0800)

committer Sean Christopherson <seanjc@google.com>

Wed, 19 Nov 2025 00:22:44 +0000 (16:22 -0800)
author Sean Christopherson <seanjc@google.com>
Thu, 13 Nov 2025 23:37:44 +0000 (15:37 -0800)
committer Sean Christopherson <seanjc@google.com>
Wed, 19 Nov 2025 00:22:44 +0000 (16:22 -0800)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index 4bb5408e68bffbf3334fe04e0c1326deef32a7ec..d4de7ca2bc4be59e235fce691bf89e655421abaf 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -302,6 +302,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
         return 0;
  }
  
+static void vmx_cleanup_l1d_flush(void)
+{
+       if (vmx_l1d_flush_pages) {
+               free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
+               vmx_l1d_flush_pages = NULL;
+       }
+       /* Restore state so sysfs ignores VMX */
+       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
+}
+
  static int vmentry_l1d_flush_parse(const char *s)
  {
         unsigned int i;
@@ -352,6 +362,83 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
         return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
  }
  
+/*
+ * Software based L1D cache flush which is used when microcode providing
+ * the cache control MSR is not loaded.
+ *
+ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
+ * flush it is required to read in 64 KiB because the replacement algorithm
+ * is not exactly LRU. This could be sized at runtime via topology
+ * information but as all relevant affected CPUs have 32KiB L1D cache size
+ * there is no point in doing so.
+ */
+static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
+{
+       int size = PAGE_SIZE << L1D_CACHE_ORDER;
+
+       /*
+        * This code is only executed when the flush mode is 'cond' or
+        * 'always'
+        */
+       if (static_branch_likely(&vmx_l1d_flush_cond)) {
+               bool flush_l1d;
+
+               /*
+                * Clear the per-vcpu flush bit, it gets set again if the vCPU
+                * is reloaded, i.e. if the vCPU is scheduled out or if KVM
+                * exits to userspace, or if KVM reaches one of the unsafe
+                * VMEXIT handlers, e.g. if KVM calls into the emulator.
+                */
+               flush_l1d = vcpu->arch.l1tf_flush_l1d;
+               vcpu->arch.l1tf_flush_l1d = false;
+
+               /*
+                * Clear the per-cpu flush bit, it gets set again from
+                * the interrupt handlers.
+                */
+               flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
+               kvm_clear_cpu_l1tf_flush_l1d();
+
+               if (!flush_l1d)
+                       return;
+       }
+
+       vcpu->stat.l1d_flush++;
+
+       if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+               native_wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+               return;
+       }
+
+       asm volatile(
+               /* First ensure the pages are in the TLB */
+               "xorl   %%eax, %%eax\n"
+               ".Lpopulate_tlb:\n\t"
+               "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
+               "addl   $4096, %%eax\n\t"
+               "cmpl   %%eax, %[size]\n\t"
+               "jne    .Lpopulate_tlb\n\t"
+               "xorl   %%eax, %%eax\n\t"
+               "cpuid\n\t"
+               /* Now fill the cache */
+               "xorl   %%eax, %%eax\n"
+               ".Lfill_cache:\n"
+               "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
+               "addl   $64, %%eax\n\t"
+               "cmpl   %%eax, %[size]\n\t"
+               "jne    .Lfill_cache\n\t"
+               "lfence\n"
+               :: [flush_pages] "r" (vmx_l1d_flush_pages),
+                   [size] "r" (size)
+               : "eax", "ebx", "ecx", "edx");
+}
+
+static const struct kernel_param_ops vmentry_l1d_flush_ops = {
+       .set = vmentry_l1d_flush_set,
+       .get = vmentry_l1d_flush_get,
+};
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
+
  static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
  {
         u64 msr;
@@ -404,12 +491,6 @@ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
                 vmx->disable_fb_clear = false;
  }
  
-static const struct kernel_param_ops vmentry_l1d_flush_ops = {
-       .set = vmentry_l1d_flush_set,
-       .get = vmentry_l1d_flush_get,
-};
-module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
-
  static u32 vmx_segment_access_rights(struct kvm_segment *var);
  
  void vmx_vmexit(void);
@@ -6646,77 +6727,6 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
         return ret;
  }
  
-/*
- * Software based L1D cache flush which is used when microcode providing
- * the cache control MSR is not loaded.
- *
- * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
- * flush it is required to read in 64 KiB because the replacement algorithm
- * is not exactly LRU. This could be sized at runtime via topology
- * information but as all relevant affected CPUs have 32KiB L1D cache size
- * there is no point in doing so.
- */
-static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
-{
-       int size = PAGE_SIZE << L1D_CACHE_ORDER;
-
-       /*
-        * This code is only executed when the flush mode is 'cond' or
-        * 'always'
-        */
-       if (static_branch_likely(&vmx_l1d_flush_cond)) {
-               bool flush_l1d;
-
-               /*
-                * Clear the per-vcpu flush bit, it gets set again if the vCPU
-                * is reloaded, i.e. if the vCPU is scheduled out or if KVM
-                * exits to userspace, or if KVM reaches one of the unsafe
-                * VMEXIT handlers, e.g. if KVM calls into the emulator.
-                */
-               flush_l1d = vcpu->arch.l1tf_flush_l1d;
-               vcpu->arch.l1tf_flush_l1d = false;
-
-               /*
-                * Clear the per-cpu flush bit, it gets set again from
-                * the interrupt handlers.
-                */
-               flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
-               kvm_clear_cpu_l1tf_flush_l1d();
-
-               if (!flush_l1d)
-                       return;
-       }
-
-       vcpu->stat.l1d_flush++;
-
-       if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
-               native_wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
-               return;
-       }
-
-       asm volatile(
-               /* First ensure the pages are in the TLB */
-               "xorl   %%eax, %%eax\n"
-               ".Lpopulate_tlb:\n\t"
-               "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
-               "addl   $4096, %%eax\n\t"
-               "cmpl   %%eax, %[size]\n\t"
-               "jne    .Lpopulate_tlb\n\t"
-               "xorl   %%eax, %%eax\n\t"
-               "cpuid\n\t"
-               /* Now fill the cache */
-               "xorl   %%eax, %%eax\n"
-               ".Lfill_cache:\n"
-               "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
-               "addl   $64, %%eax\n\t"
-               "cmpl   %%eax, %[size]\n\t"
-               "jne    .Lfill_cache\n\t"
-               "lfence\n"
-               :: [flush_pages] "r" (vmx_l1d_flush_pages),
-                   [size] "r" (size)
-               : "eax", "ebx", "ecx", "edx");
-}
-
  void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
  {
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -8651,16 +8661,6 @@ __init int vmx_hardware_setup(void)
         return r;
  }
  
-static void vmx_cleanup_l1d_flush(void)
-{
-       if (vmx_l1d_flush_pages) {
-               free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
-               vmx_l1d_flush_pages = NULL;
-       }
-       /* Restore state so sysfs ignores VMX */
-       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
-}
-
  void vmx_exit(void)
  {
         allow_smaller_maxphyaddr = false;
author	Sean Christopherson <seanjc@google.com>
	Thu, 13 Nov 2025 23:37:44 +0000 (15:37 -0800)
committer	Sean Christopherson <seanjc@google.com>
	Wed, 19 Nov 2025 00:22:44 +0000 (16:22 -0800)