]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: x86/pmu: Load/save GLOBAL_CTRL via entry/exit fields for mediated PMU
authorDapeng Mi <dapeng1.mi@linux.intel.com>
Sat, 6 Dec 2025 00:16:57 +0000 (16:16 -0800)
committerSean Christopherson <seanjc@google.com>
Thu, 8 Jan 2026 19:52:07 +0000 (11:52 -0800)
When running a guest with a mediated PMU, context switch PERF_GLOBAL_CTRL
via the dedicated VMCS fields for both host and guest.  For the host,
always zero GLOBAL_CTRL on exit as the guest's state will still be loaded
in hardware (KVM will context switch the bulk of PMU state outside of the
inner run loop).  For the guest, use the dedicated fields to atomically
load and save PERF_GLOBAL_CTRL on all entry/exits.

For now, require VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL support (introduced by
Sapphire Rapids).  KVM can support such CPUs by saving PERF_GLOBAL_CTRL
via the MSR save list, a.k.a. the MSR auto-store list, but defer that
support as it adds a small amount of complexity and is somewhat unique.

To minimize VM-Entry latency, propagate IA32_PERF_GLOBAL_CTRL to the VMCS
on-demand.  But to minimize complexity, read IA32_PERF_GLOBAL_CTRL out of
the VMCS on all non-failing VM-Exits.  I.e. partially cache the MSR.
KVM could track GLOBAL_CTRL as an EXREG and defer all reads, but writes
are rare, i.e. the dirty tracking for an EXREG is unnecessary, and it's
not obvious that shaving ~15-20 cycles per exit is meaningful given the
total overhead associated with mediated PMU context switches.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Co-developed-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Tested-by: Manali Shukla <manali.shukla@amd.com>
Link: https://patch.msgid.link/20251206001720.468579-22-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/kvm-x86-pmu-ops.h
arch/x86/include/asm/vmx.h
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h

index 9159bf1a473074a644e595e4c4c52267429f202a..ad2cc82abf794493f1cbda5bac1eaf1ce71fafd8 100644 (file)
@@ -23,5 +23,7 @@ KVM_X86_PMU_OP_OPTIONAL(reset)
 KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
 KVM_X86_PMU_OP_OPTIONAL(cleanup)
 
+KVM_X86_PMU_OP_OPTIONAL(write_global_ctrl)
+
 #undef KVM_X86_PMU_OP
 #undef KVM_X86_PMU_OP_OPTIONAL
index c85c500195239b2eb4fe08fe090d4b6a816f25f0..b92ff87e35605d62b252aa84ccbd34f8be78731a 100644 (file)
 #define VM_EXIT_PT_CONCEAL_PIP                 0x01000000
 #define VM_EXIT_CLEAR_IA32_RTIT_CTL            0x02000000
 #define VM_EXIT_LOAD_CET_STATE                  0x10000000
+#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL     0x40000000
 
 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR      0x00036dff
 
index fcecb4c21599cd6ee80b57fc9fdc0b75724eaa99..4b896cbb3d5368b6c3ba3144ffe37cedff70da84 100644 (file)
@@ -103,7 +103,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
 #undef __KVM_X86_PMU_OP
 }
 
-void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
+void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
 {
        bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
        int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
@@ -139,6 +139,9 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
            !pmu_ops->is_mediated_pmu_supported(&kvm_host_pmu))
                enable_mediated_pmu = false;
 
+       if (!enable_mediated_pmu)
+               pmu_ops->write_global_ctrl = NULL;
+
        if (!enable_pmu) {
                memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
                return;
@@ -834,6 +837,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        diff = pmu->global_ctrl ^ data;
                        pmu->global_ctrl = data;
                        reprogram_counters(pmu, diff);
+
+                       if (kvm_vcpu_has_mediated_pmu(vcpu))
+                               kvm_pmu_call(write_global_ctrl)(data);
                }
                break;
        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
@@ -928,8 +934,11 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
         * in the global controls).  Emulate that behavior when refreshing the
         * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
         */
-       if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
+       if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) {
                pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
+               if (kvm_vcpu_has_mediated_pmu(vcpu))
+                       kvm_pmu_call(write_global_ctrl)(pmu->global_ctrl);
+       }
 
        bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
        bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX,
index 506c203587eac4619b4eb2d0716e78e802c995d6..2ff469334c1afeca25a66dd19b88c03825daf5c2 100644 (file)
@@ -38,6 +38,7 @@ struct kvm_pmu_ops {
        void (*cleanup)(struct kvm_vcpu *vcpu);
 
        bool (*is_mediated_pmu_supported)(struct x86_pmu_capability *host_pmu);
+       void (*write_global_ctrl)(u64 global_ctrl);
 
        const u64 EVENTSEL_EVENT;
        const int MAX_NR_GP_COUNTERS;
@@ -183,7 +184,7 @@ static inline bool pmc_is_locally_enabled(struct kvm_pmc *pmc)
 
 extern struct x86_pmu_capability kvm_pmu_cap;
 
-void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops);
+void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops);
 
 void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc);
 
index 26302fd6dd9c91d2f605b52d8a62e85e317d07a3..4e371c93ae168034b6e0ed073a535576bdd15534 100644 (file)
@@ -109,6 +109,12 @@ static inline bool cpu_has_load_cet_ctrl(void)
 {
        return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE);
 }
+
+static inline bool cpu_has_save_perf_global_ctrl(void)
+{
+       return vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL;
+}
+
 static inline bool cpu_has_vmx_mpx(void)
 {
        return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
index 050c21298213013f509ac396fd5329dfe8f628fb..dbab7cca7a622f4c74a90d5d41febc1ded019e5b 100644 (file)
@@ -778,7 +778,29 @@ static bool intel_pmu_is_mediated_pmu_supported(struct x86_pmu_capability *host_
         * Require v4+ for MSR_CORE_PERF_GLOBAL_STATUS_SET, and full-width
         * writes so that KVM can precisely load guest counter values.
         */
-       return host_pmu->version >= 4 && host_perf_cap & PERF_CAP_FW_WRITES;
+       if (host_pmu->version < 4 || !(host_perf_cap & PERF_CAP_FW_WRITES))
+               return false;
+
+       /*
+        * All CPUs that support a mediated PMU are expected to support loading
+        * PERF_GLOBAL_CTRL via dedicated VMCS fields.
+        */
+       if (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl()))
+               return false;
+
+       /*
+        * KVM doesn't yet support mediated PMU on CPUs without support for
+        * saving PERF_GLOBAL_CTRL via a dedicated VMCS field.
+        */
+       if (!cpu_has_save_perf_global_ctrl())
+               return false;
+
+       return true;
+}
+
+static void intel_pmu_write_global_ctrl(u64 global_ctrl)
+{
+       vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, global_ctrl);
 }
 
 struct kvm_pmu_ops intel_pmu_ops __initdata = {
@@ -794,6 +816,7 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
        .cleanup = intel_pmu_cleanup,
 
        .is_mediated_pmu_supported = intel_pmu_is_mediated_pmu_supported,
+       .write_global_ctrl = intel_pmu_write_global_ctrl,
 
        .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
        .MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS,
index 9f71ba99cf700c881b30915c729ede09f75390aa..72b92cea9d7279adb5fcd50ca57f4d34cd99805b 100644 (file)
@@ -4294,6 +4294,18 @@ static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
                vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, intercept);
        }
 
+       if (enable_mediated_pmu) {
+               bool is_mediated_pmu = kvm_vcpu_has_mediated_pmu(vcpu);
+               struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+               vm_entry_controls_changebit(vmx,
+                                           VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, is_mediated_pmu);
+
+               vm_exit_controls_changebit(vmx,
+                                          VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
+                                          VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, is_mediated_pmu);
+       }
+
        /*
         * x2APIC and LBR MSR intercepts are modified on-demand and cannot be
         * filtered by userspace.
@@ -4476,6 +4488,16 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
                vmcs_writel(HOST_SSP, 0);
                vmcs_writel(HOST_INTR_SSP_TABLE, 0);
        }
+
+       /*
+        * When running a guest with a mediated PMU, guest state is resident in
+        * hardware after VM-Exit.  Zero PERF_GLOBAL_CTRL on exit so that host
+        * activity doesn't bleed into the guest counters.  When running with
+        * an emulated PMU, PERF_GLOBAL_CTRL is dynamically computed on every
+        * entry/exit to merge guest and host PMU usage.
+        */
+       if (enable_mediated_pmu)
+               vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0);
 }
 
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -4543,7 +4565,8 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
                                 VM_EXIT_CLEAR_IA32_RTIT_CTL);
        /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
        return vmexit_ctrl &
-               ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
+               ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER |
+                 VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL);
 }
 
 void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
@@ -7270,6 +7293,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
        struct perf_guest_switch_msr *msrs;
        struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
 
+       if (kvm_vcpu_has_mediated_pmu(&vmx->vcpu))
+               return;
+
        pmu->host_cross_mapped_mask = 0;
        if (pmu->pebs_enable & pmu->global_ctrl)
                intel_pmu_cross_mapped_check(pmu);
@@ -7572,6 +7598,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
 
        vmx->loaded_vmcs->launched = 1;
 
+       if (!msr_write_intercepted(vmx, MSR_CORE_PERF_GLOBAL_CTRL))
+               vcpu_to_pmu(vcpu)->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL);
+
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
 
index bc3ed3145d7e837f1eac90275652e61579b44ab5..d7a96c84371fbc75ca76323d8c424eaabdd850ef 100644 (file)
@@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void)
               VM_EXIT_CLEAR_BNDCFGS |                                  \
               VM_EXIT_PT_CONCEAL_PIP |                                 \
               VM_EXIT_CLEAR_IA32_RTIT_CTL |                            \
-              VM_EXIT_LOAD_CET_STATE)
+              VM_EXIT_LOAD_CET_STATE |                                 \
+              VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL)
 
 #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL                     \
        (PIN_BASED_EXT_INTR_MASK |                                      \