]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: x86: Provide a capability to disable APERF/MPERF read intercepts
authorJim Mattson <jmattson@google.com>
Thu, 26 Jun 2025 00:12:22 +0000 (17:12 -0700)
committerSean Christopherson <seanjc@google.com>
Wed, 9 Jul 2025 16:33:37 +0000 (09:33 -0700)
Allow a guest to read the physical IA32_APERF and IA32_MPERF MSRs
without interception.

The IA32_APERF and IA32_MPERF MSRs are not virtualized. Writes are not
handled at all. The MSR values are not zeroed on vCPU creation, saved
on suspend, or restored on resume. No accommodation is made for
processor migration or for sharing a logical processor with other
tasks. No adjustments are made for non-unit TSC multipliers. The MSRs
do not account for time the same way as the comparable PMU events,
whether the PMU is virtualized by the traditional emulation method or
the new mediated pass-through approach.

Nonetheless, in a properly constrained environment, this capability
can be combined with a guest CPUID table that advertises support for
CPUID.6:ECX.APERFMPERF[bit 0] to induce a Linux guest to report the
effective physical CPU frequency in /proc/cpuinfo. Moreover, there is
no performance cost for this capability.

Signed-off-by: Jim Mattson <jmattson@google.com>
Link: https://lore.kernel.org/r/20250530185239.2335185-3-jmattson@google.com
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250626001225.744268-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Documentation/virt/kvm/api.rst
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
include/uapi/linux/kvm.h
tools/include/uapi/linux/kvm.h

index f0d961436d0f383599d8c1e721e2f838d428ae00..6be1ddedec492097fc941b27050928c82988139d 100644 (file)
@@ -7844,6 +7844,7 @@ Valid bits in args[0] are::
   #define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
   #define KVM_X86_DISABLE_EXITS_PAUSE            (1 << 2)
   #define KVM_X86_DISABLE_EXITS_CSTATE           (1 << 3)
+  #define KVM_X86_DISABLE_EXITS_APERFMPERF       (1 << 4)
 
 Enabling this capability on a VM provides userspace with a way to no
 longer intercept some instructions for improved latency in some
@@ -7854,6 +7855,28 @@ all such vmexits.
 
 Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
 
+Virtualizing the ``IA32_APERF`` and ``IA32_MPERF`` MSRs requires more
+than just disabling APERF/MPERF exits. While both Intel and AMD
+document strict usage conditions for these MSRs--emphasizing that only
+the ratio of their deltas over a time interval (T0 to T1) is
+architecturally defined--simply passing through the MSRs can still
+produce an incorrect ratio.
+
+This erroneous ratio can occur if, between T0 and T1:
+
+1. The vCPU thread migrates between logical processors.
+2. Live migration or suspend/resume operations take place.
+3. Another task shares the vCPU's logical processor.
+4. C-states lower than C0 are emulated (e.g., via HLT interception).
+5. The guest TSC frequency doesn't match the host TSC frequency.
+
+Due to these complexities, KVM does not automatically associate this
+passthrough capability with the guest CPUID bit,
+``CPUID.6:ECX.APERFMPERF[bit 0]``. Userspace VMMs that deem this
+mechanism adequate for virtualizing the ``IA32_APERF`` and
+``IA32_MPERF`` MSRs must set the guest CPUID bit explicitly.
+
+
 7.14 KVM_CAP_S390_HPAGE_1M
 --------------------------
 
index 749f7b866ac808f9d12b2a8ab1402553a0498f12..b7fd2e869998751fbd6accf3b4b060f907ff9019 100644 (file)
@@ -194,7 +194,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
  * Hardcode the capacity of the array based on the maximum number of _offsets_.
  * MSRs are batched together, so there are fewer offsets than MSRs.
  */
-static int nested_svm_msrpm_merge_offsets[6] __ro_after_init;
+static int nested_svm_msrpm_merge_offsets[7] __ro_after_init;
 static int nested_svm_nr_msrpm_merge_offsets __ro_after_init;
 typedef unsigned long nsvm_msrpm_merge_t;
 
@@ -216,6 +216,8 @@ int __init nested_svm_init_msrpm_merge_offsets(void)
                MSR_IA32_SPEC_CTRL,
                MSR_IA32_PRED_CMD,
                MSR_IA32_FLUSH_CMD,
+               MSR_IA32_APERF,
+               MSR_IA32_MPERF,
                MSR_IA32_LASTBRANCHFROMIP,
                MSR_IA32_LASTBRANCHTOIP,
                MSR_IA32_LASTINTFROMIP,
index 1261447ffcdd787baaf94bb8aa526daf594baefb..fedf68c63318948c19d202309787b9ad595558d2 100644 (file)
@@ -838,6 +838,11 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
        svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW,
                                  guest_cpuid_is_intel_compatible(vcpu));
 
+       if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
+               svm_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
+               svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
+       }
+
        if (sev_es_guest(vcpu->kvm))
                sev_es_recalc_msr_intercepts(vcpu);
 
index c69df3aba8d1f9ec62f8b23d5d763bea56d4dd00..b8ea1969113df0e4bb8c67cd740be7e9659574ed 100644 (file)
@@ -715,6 +715,12 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
        nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
                                         MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
 
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_APERF, MSR_TYPE_R);
+
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_MPERF, MSR_TYPE_R);
+
        kvm_vcpu_unmap(vcpu, &map);
 
        vmx->nested.force_msr_bitmap_recalc = false;
index b064e50c6e64e177a672daa76005090e427d24c4..77bbb2b93418398c1e93266be0468bb80703e6f5 100644 (file)
@@ -4084,6 +4084,10 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
                vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
                vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
        }
+       if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
+               vmx_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
+               vmx_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
+       }
 
        /* PT MSRs can be passed through iff PT is exposed to the guest. */
        if (vmx_pt_mode_is_host_guest())
index 6dda7bf4c44c8b83a0b4269f172f86216a8c0e9a..912260e3725d54df42b12f868b2557b0a324db58 100644 (file)
@@ -4577,6 +4577,9 @@ static u64 kvm_get_allowed_disable_exits(void)
 {
        u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
 
+       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+               r |= KVM_X86_DISABLE_EXITS_APERFMPERF;
+
        if (!mitigate_smt_rsb) {
                r |= KVM_X86_DISABLE_EXITS_HLT |
                        KVM_X86_DISABLE_EXITS_CSTATE;
@@ -6613,7 +6616,8 @@ split_irqchip_unlock:
 
                if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
                    cpu_smt_possible() &&
-                   (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
+                   (cap->args[0] & ~(KVM_X86_DISABLE_EXITS_PAUSE |
+                                     KVM_X86_DISABLE_EXITS_APERFMPERF)))
                        pr_warn_once(SMT_RSB_MSG);
 
                kvm_disable_exits(kvm, cap->args[0]);
index 17ec8436e56502fd24249b4d979299699a8ea296..e77281b6e2b2983a136740e48896120ba6def981 100644 (file)
@@ -524,6 +524,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
        return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_CSTATE;
 }
 
+static inline bool kvm_aperfmperf_in_guest(struct kvm *kvm)
+{
+       return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_APERFMPERF;
+}
+
 static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
 {
        return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
index 7a4c35ff03fecde6032eb984b0d0e000089063c8..aeb2ca10b1909c3b3512e902d91b22434bc11004 100644 (file)
@@ -644,6 +644,7 @@ struct kvm_ioeventfd {
 #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_EXITS_CSTATE         (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF     (1 << 4)
 
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
index b6ae8ad8934b52c7e779cd2e84b8330eb1de68bc..eef57c1171401512dbb5210aa731df9eec8d4cd6 100644 (file)
@@ -617,6 +617,7 @@ struct kvm_ioeventfd {
 #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_EXITS_CSTATE         (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF     (1 << 4)
 
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {