]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: VMX: Manually recalc all MSR intercepts on userspace MSR filter change
authorSean Christopherson <seanjc@google.com>
Tue, 10 Jun 2025 22:57:23 +0000 (15:57 -0700)
committerSean Christopherson <seanjc@google.com>
Fri, 20 Jun 2025 20:07:28 +0000 (13:07 -0700)
On a userspace MSR filter change, recalculate all MSR intercepts using the
filter-agnostic logic instead of maintaining a "shadow copy" of KVM's
desired intercepts.  The shadow bitmaps add yet another point of failure,
are confusing (e.g. what does "handled specially" mean!?!?), an eyesore,
and a maintenance burden.

Given that KVM *must* be able to recalculate the correct intercepts at any
given time, and that MSR filter updates are not hot paths, there is zero
benefit to maintaining the shadow bitmaps.

Opportunistically switch from boot_cpu_has() to cpu_feature_enabled() as
appropriate.

Link: https://lore.kernel.org/all/aCdPbZiYmtni4Bjs@google.com
Link: https://lore.kernel.org/all/20241126180253.GAZ0YNTdXH1UGeqsu6@fat_crate.local
Cc: Borislav Petkov <bp@alien8.de>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Xin Li (Intel) <xin@zytor.com>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
Link: https://lore.kernel.org/r/20250610225737.156318-19-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h

index 90c45f35938ca620c4303b6df794a19892b744d8..0ca9f5de37cefaf92bd966c0e869a82c8fb92dc7 100644 (file)
@@ -167,31 +167,6 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
        RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
        RTIT_STATUS_BYTECNT))
 
-/*
- * List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic, PT and LBR MSRs are handled specially.
- */
-static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
-       MSR_IA32_SPEC_CTRL,
-       MSR_IA32_PRED_CMD,
-       MSR_IA32_FLUSH_CMD,
-       MSR_IA32_TSC,
-#ifdef CONFIG_X86_64
-       MSR_FS_BASE,
-       MSR_GS_BASE,
-       MSR_KERNEL_GS_BASE,
-       MSR_IA32_XFD,
-       MSR_IA32_XFD_ERR,
-#endif
-       MSR_IA32_SYSENTER_CS,
-       MSR_IA32_SYSENTER_ESP,
-       MSR_IA32_SYSENTER_EIP,
-       MSR_CORE_C1_RES,
-       MSR_CORE_C3_RESIDENCY,
-       MSR_CORE_C6_RESIDENCY,
-       MSR_CORE_C7_RESIDENCY,
-};
-
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -674,40 +649,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
        return flexpriority_enabled && lapic_in_kernel(vcpu);
 }
 
-static int vmx_get_passthrough_msr_slot(u32 msr)
-{
-       int i;
-
-       switch (msr) {
-       case 0x800 ... 0x8ff:
-               /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
-               return -ENOENT;
-       case MSR_IA32_RTIT_STATUS:
-       case MSR_IA32_RTIT_OUTPUT_BASE:
-       case MSR_IA32_RTIT_OUTPUT_MASK:
-       case MSR_IA32_RTIT_CR3_MATCH:
-       case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
-               /* PT MSRs. These are handled in pt_update_intercept_for_msr() */
-       case MSR_LBR_SELECT:
-       case MSR_LBR_TOS:
-       case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31:
-       case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31:
-       case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
-       case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
-       case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
-               /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
-               return -ENOENT;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
-               if (vmx_possible_passthrough_msrs[i] == msr)
-                       return i;
-       }
-
-       WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
-       return -ENOENT;
-}
-
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
@@ -4026,25 +3967,12 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
-       int idx;
 
        if (!cpu_has_vmx_msr_bitmap())
                return;
 
        vmx_msr_bitmap_l01_changed(vmx);
 
-       /*
-        * Mark the desired intercept state in shadow bitmap, this is needed
-        * for resync when the MSR filters change.
-        */
-       idx = vmx_get_passthrough_msr_slot(msr);
-       if (idx >= 0) {
-               if (type & MSR_TYPE_R)
-                       __clear_bit(idx, vmx->shadow_msr_intercept.read);
-               if (type & MSR_TYPE_W)
-                       __clear_bit(idx, vmx->shadow_msr_intercept.write);
-       }
-
        if ((type & MSR_TYPE_R) &&
            !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
                vmx_set_msr_bitmap_read(msr_bitmap, msr);
@@ -4068,25 +3996,12 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
-       int idx;
 
        if (!cpu_has_vmx_msr_bitmap())
                return;
 
        vmx_msr_bitmap_l01_changed(vmx);
 
-       /*
-        * Mark the desired intercept state in shadow bitmap, this is needed
-        * for resync when the MSR filter changes.
-        */
-       idx = vmx_get_passthrough_msr_slot(msr);
-       if (idx >= 0) {
-               if (type & MSR_TYPE_R)
-                       __set_bit(idx, vmx->shadow_msr_intercept.read);
-               if (type & MSR_TYPE_W)
-                       __set_bit(idx, vmx->shadow_msr_intercept.write);
-       }
-
        if (type & MSR_TYPE_R)
                vmx_set_msr_bitmap_read(msr_bitmap, msr);
 
@@ -4170,35 +4085,58 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
        }
 }
 
-void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u32 i;
-
        if (!cpu_has_vmx_msr_bitmap())
                return;
 
-       /*
-        * Redo intercept permissions for MSRs that KVM is passing through to
-        * the guest.  Disabling interception will check the new MSR filter and
-        * ensure that KVM enables interception if usersepace wants to filter
-        * the MSR.  MSRs that KVM is already intercepting don't need to be
-        * refreshed since KVM is going to intercept them regardless of what
-        * userspace wants.
-        */
-       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
-               u32 msr = vmx_possible_passthrough_msrs[i];
-
-               if (!test_bit(i, vmx->shadow_msr_intercept.read))
-                       vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
-
-               if (!test_bit(i, vmx->shadow_msr_intercept.write))
-                       vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
+       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
+#ifdef CONFIG_X86_64
+       vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+#endif
+       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+       if (kvm_cstate_in_guest(vcpu->kvm)) {
+               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
+               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
        }
 
        /* PT MSRs can be passed through iff PT is exposed to the guest. */
        if (vmx_pt_mode_is_host_guest())
                pt_update_intercept_for_msr(vcpu);
+
+       if (vcpu->arch.xfd_no_write_intercept)
+               vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, MSR_TYPE_RW);
+
+       vmx_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
+                                 !to_vmx(vcpu)->spec_ctrl);
+
+       if (kvm_cpu_cap_has(X86_FEATURE_XFD))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
+                                         !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
+
+       if (cpu_feature_enabled(X86_FEATURE_IBPB))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+                                         !guest_has_pred_cmd_msr(vcpu));
+
+       if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+                                         !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
+       /*
+        * x2APIC and LBR MSR intercepts are modified on-demand and cannot be
+        * filtered by userspace.
+        */
+}
+
+void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+       vmx_recalc_msr_intercepts(vcpu);
 }
 
 static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
@@ -7554,26 +7492,6 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
                evmcs->hv_enlightenments_control.msr_bitmap = 1;
        }
 
-       /* The MSR bitmap starts with all ones */
-       bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
-       bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
-
-       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
-#ifdef CONFIG_X86_64
-       vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
-       vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
-       vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
-#endif
-       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
-       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
-       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
-       if (kvm_cstate_in_guest(vcpu->kvm)) {
-               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
-               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
-               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
-               vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
-       }
-
        vmx->loaded_vmcs = &vmx->vmcs01;
 
        if (cpu_need_virtualize_apic_accesses(vcpu)) {
@@ -7860,18 +7778,6 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                }
        }
 
-       if (kvm_cpu_cap_has(X86_FEATURE_XFD))
-               vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
-                                         !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
-
-       if (boot_cpu_has(X86_FEATURE_IBPB))
-               vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
-                                         !guest_has_pred_cmd_msr(vcpu));
-
-       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
-               vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
-                                         !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
-
        set_cr4_guest_host_mask(vmx);
 
        vmx_write_encls_bitmap(vcpu, NULL);
@@ -7887,6 +7793,9 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                vmx->msr_ia32_feature_control_valid_bits &=
                        ~FEAT_CTL_SGX_LC_ENABLED;
 
+       /* Recalc MSR interception to account for feature changes. */
+       vmx_recalc_msr_intercepts(vcpu);
+
        /* Refresh #PF interception to account for MAXPHYADDR changes. */
        vmx_update_exception_bitmap(vcpu);
 }
index 32053ece7797b952f232bcecc6c9c40cbb777e1d..5a87be8854f346f52310be2e087e24b09d416157 100644 (file)
@@ -294,13 +294,6 @@ struct vcpu_vmx {
        struct pt_desc pt_desc;
        struct lbr_desc lbr_desc;
 
-       /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS  16
-       struct {
-               DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
-               DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
-       } shadow_msr_intercept;
-
        /* ve_info must be page aligned. */
        struct vmx_ve_information *ve_info;
 };