From 2904df6692f429853cf99b4b47c8592b2f49edaa Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Fri, 5 Dec 2025 16:16:58 -0800 Subject: [PATCH] KVM: x86/pmu: Disable interception of select PMU MSRs for mediated vPMUs For vCPUs with a mediated vPMU, disable interception of counter MSRs for PMCs that are exposed to the guest, and for GLOBAL_CTRL and related MSRs if they are fully supported according to the vCPU model, i.e. if the MSRs and all bits supported by hardware exist from the guest's point of view. Do NOT passthrough event selector or fixed counter control MSRs, so that KVM can enforce userspace-defined event filters, e.g. to prevent use of AnyThread events (which is unfortunately a setting in the fixed counter control MSR). Defer support for nested passthrough of mediated PMU MSRs to the future, as the logic for nested MSR interception is unfortunately vendor specific. Suggested-by: Sean Christopherson Co-developed-by: Mingwei Zhang Signed-off-by: Mingwei Zhang Co-developed-by: Sandipan Das Signed-off-by: Sandipan Das Signed-off-by: Dapeng Mi [sean: squash patches, massage changelog, refresh VMX MSRs on filter change] Tested-by: Xudong Hao Tested-by: Manali Shukla Link: https://patch.msgid.link/20251206001720.468579-23-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 39 ++++++++++++++++-------- arch/x86/kvm/pmu.h | 1 + arch/x86/kvm/svm/svm.c | 36 ++++++++++++++++++++++ arch/x86/kvm/vmx/pmu_intel.c | 13 -------- arch/x86/kvm/vmx/pmu_intel.h | 15 +++++++++ arch/x86/kvm/vmx/vmx.c | 59 +++++++++++++++++++++++++++++------- 6 files changed, 127 insertions(+), 36 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 4b896cbb3d536..3e048c170b975 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -717,13 +717,33 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) return 0; } -bool kvm_need_rdpmc_intercept(struct kvm_vcpu *vcpu) +static bool kvm_need_any_pmc_intercept(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); if (!kvm_vcpu_has_mediated_pmu(vcpu)) return true; + /* + * Note! Check *host* PMU capabilities, not KVM's PMU capabilities, as + * KVM's capabilities are constrained based on KVM support, i.e. KVM's + * capabilities themselves may be a subset of hardware capabilities. + */ + return pmu->nr_arch_gp_counters != kvm_host_pmu.num_counters_gp || + pmu->nr_arch_fixed_counters != kvm_host_pmu.num_counters_fixed; +} + +bool kvm_need_perf_global_ctrl_intercept(struct kvm_vcpu *vcpu) +{ + return kvm_need_any_pmc_intercept(vcpu) || + !kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_need_perf_global_ctrl_intercept); + +bool kvm_need_rdpmc_intercept(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + /* * VMware allows access to these Pseduo-PMCs even when read via RDPMC * in Ring3 when CR4.PCE=0. @@ -731,13 +751,7 @@ bool kvm_need_rdpmc_intercept(struct kvm_vcpu *vcpu) if (enable_vmware_backdoor) return true; - /* - * Note! Check *host* PMU capabilities, not KVM's PMU capabilities, as - * KVM's capabilities are constrained based on KVM support, i.e. KVM's - * capabilities themselves may be a subset of hardware capabilities. - */ - return pmu->nr_arch_gp_counters != kvm_host_pmu.num_counters_gp || - pmu->nr_arch_fixed_counters != kvm_host_pmu.num_counters_fixed || + return kvm_need_any_pmc_intercept(vcpu) || pmu->counter_bitmask[KVM_PMC_GP] != (BIT_ULL(kvm_host_pmu.bit_width_gp) - 1) || pmu->counter_bitmask[KVM_PMC_FIXED] != (BIT_ULL(kvm_host_pmu.bit_width_fixed) - 1); } @@ -934,11 +948,12 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) * in the global controls). Emulate that behavior when refreshing the * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL. */ - if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) { + if (pmu->nr_arch_gp_counters && + (kvm_pmu_has_perf_global_ctrl(pmu) || kvm_vcpu_has_mediated_pmu(vcpu))) pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0); - if (kvm_vcpu_has_mediated_pmu(vcpu)) - kvm_pmu_call(write_global_ctrl)(pmu->global_ctrl); - } + + if (kvm_vcpu_has_mediated_pmu(vcpu)) + kvm_pmu_call(write_global_ctrl)(pmu->global_ctrl); bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX, diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 2ff469334c1af..356b08e92bc95 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -239,6 +239,7 @@ void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu); void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu); bool is_vmware_backdoor_pmc(u32 pmc_idx); +bool kvm_need_perf_global_ctrl_intercept(struct kvm_vcpu *vcpu); bool kvm_need_rdpmc_intercept(struct kvm_vcpu *vcpu); extern struct kvm_pmu_ops intel_pmu_ops; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ef43360b9282c..dca45f5151f9e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -730,6 +730,40 @@ void svm_vcpu_free_msrpm(void *msrpm) __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE)); } +static void svm_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu) +{ + bool intercept = !kvm_vcpu_has_mediated_pmu(vcpu); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + int i; + + if (!enable_mediated_pmu) + return; + + /* Legacy counters are always available for AMD CPUs with a PMU. */ + for (i = 0; i < min(pmu->nr_arch_gp_counters, AMD64_NUM_COUNTERS); i++) + svm_set_intercept_for_msr(vcpu, MSR_K7_PERFCTR0 + i, + MSR_TYPE_RW, intercept); + + intercept |= !guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE); + for (i = 0; i < pmu->nr_arch_gp_counters; i++) + svm_set_intercept_for_msr(vcpu, MSR_F15H_PERF_CTR + 2 * i, + MSR_TYPE_RW, intercept); + + for ( ; i < kvm_pmu_cap.num_counters_gp; i++) + svm_enable_intercept_for_msr(vcpu, MSR_F15H_PERF_CTR + 2 * i, + MSR_TYPE_RW); + + intercept = kvm_need_perf_global_ctrl_intercept(vcpu); + svm_set_intercept_for_msr(vcpu, MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + MSR_TYPE_RW, intercept); + svm_set_intercept_for_msr(vcpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, + MSR_TYPE_RW, intercept); + svm_set_intercept_for_msr(vcpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + MSR_TYPE_RW, intercept); + svm_set_intercept_for_msr(vcpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, + MSR_TYPE_RW, intercept); +} + static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -798,6 +832,8 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu) if (sev_es_guest(vcpu->kvm)) sev_es_recalc_msr_intercepts(vcpu); + svm_recalc_pmu_msr_intercepts(vcpu); + /* * x2APIC intercepts are modified on-demand and cannot be filtered by * userspace. diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index dbab7cca7a622..820da47454d79 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -128,19 +128,6 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, return &counters[array_index_nospec(idx, num_counters)]; } -static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) -{ - if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) - return 0; - - return vcpu->arch.perf_capabilities; -} - -static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) -{ - return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0; -} - static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) { if (!fw_writes_is_enabled(pmu_to_vcpu(pmu))) diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h index 5620d0882cdc6..5d9357640aa18 100644 --- a/arch/x86/kvm/vmx/pmu_intel.h +++ b/arch/x86/kvm/vmx/pmu_intel.h @@ -4,6 +4,21 @@ #include +#include "cpuid.h" + +static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) +{ + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) + return 0; + + return vcpu->arch.perf_capabilities; +} + +static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) +{ + return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0; +} + bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu); int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 72b92cea9d727..f0a20ff2a941a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4228,6 +4228,53 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } +static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu) +{ + bool has_mediated_pmu = kvm_vcpu_has_mediated_pmu(vcpu); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool intercept = !has_mediated_pmu; + int i; + + if (!enable_mediated_pmu) + return; + + vm_entry_controls_changebit(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, + has_mediated_pmu); + + vm_exit_controls_changebit(vmx, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, + has_mediated_pmu); + + for (i = 0; i < pmu->nr_arch_gp_counters; i++) { + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PERFCTR0 + i, + MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PMC0 + i, MSR_TYPE_RW, + intercept || !fw_writes_is_enabled(vcpu)); + } + for ( ; i < kvm_pmu_cap.num_counters_gp; i++) { + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PERFCTR0 + i, + MSR_TYPE_RW, true); + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PMC0 + i, + MSR_TYPE_RW, true); + } + + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) + vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_FIXED_CTR0 + i, + MSR_TYPE_RW, intercept); + for ( ; i < kvm_pmu_cap.num_counters_fixed; i++) + vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_FIXED_CTR0 + i, + MSR_TYPE_RW, true); + + intercept = kvm_need_perf_global_ctrl_intercept(vcpu); + vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_STATUS, + MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + MSR_TYPE_RW, intercept); +} + static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { bool intercept; @@ -4294,17 +4341,7 @@ static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, intercept); } - if (enable_mediated_pmu) { - bool is_mediated_pmu = kvm_vcpu_has_mediated_pmu(vcpu); - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vm_entry_controls_changebit(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, is_mediated_pmu); - - vm_exit_controls_changebit(vmx, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, is_mediated_pmu); - } + vmx_recalc_pmu_msr_intercepts(vcpu); /* * x2APIC and LBR MSR intercepts are modified on-demand and cannot be -- 2.47.3