]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: x86/pmu: Start stubbing in mediated PMU support
authorDapeng Mi <dapeng1.mi@linux.intel.com>
Sat, 6 Dec 2025 00:16:52 +0000 (16:16 -0800)
committerSean Christopherson <seanjc@google.com>
Thu, 8 Jan 2026 19:52:04 +0000 (11:52 -0800)
Introduce enable_mediated_pmu as a global variable, with the intent of
exposing it to userspace a vendor module parameter, to control and reflect
mediated vPMU support.  Wire up the perf plumbing to create+release a
mediated PMU, but defer exposing the parameter to userspace until KVM
support for a mediated PMUs is fully landed.

To (a) minimize compatibility issues, (b) to give userspace a chance to
opt out of the restrictive side-effects of perf_create_mediated_pmu(),
and (c) to avoid adding new dependencies between enabling an in-kernel
irqchip and a mediated vPMU, defer "creating" a mediated PMU in perf
until the first vCPU is created.

Regarding userspace compatibility, an alternative solution would be to
make the mediated PMU fully opt-in, e.g. to avoid unexpected failure due
to perf_create_mediated_pmu() failing.  Ironically, that approach creates
an even bigger compatibility issue, as turning on enable_mediated_pmu
would silently break VMMs that don't utilize KVM_CAP_PMU_CAPABILITY (well,
silently until the guest tried to access PMU assets).

Regarding an in-kernel irqchip, create a mediated PMU if and only if the
VM has an in-kernel local APIC, as the mediated PMU will take a hard
dependency on forwarding PMIs to the guest without bouncing through host
userspace.  Silently "drop" the PMU instead of rejecting KVM_CREATE_VCPU,
as KVM's existing vPMU support doesn't function correctly if the local
APIC is emulated by userspace, e.g. PMIs will never be delivered.  I.e.
it's far, far more likely that rejecting KVM_CREATE_VCPU would cause
problems, e.g. for tests or userspace daemons that just want to probe
basic KVM functionality.

Note!  Deliberately make mediated PMU creation "sticky", i.e. don't unwind
it on failure to create a vCPU.  Practically speaking, there's no harm to
having a VM with a mediated PMU and no vCPUs.  To avoid an "impossible" VM
setup, reject KVM_CAP_PMU_CAPABILITY if a mediated PMU has been created,
i.e. don't let userspace disable PMU support after failed vCPU creation
(with PMU support enabled).

Defer vendor specific requirements and constraints to the future.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Co-developed-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Tested-by: Manali Shukla <manali.shukla@amd.com>
Link: https://patch.msgid.link/20251206001720.468579-17-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h

index 5a3bfa293e8b1acfa30ee44cc5e42a11486424be..defd979003beb5a2dc13ddb907276e44a0dbfc18 100644 (file)
@@ -1484,6 +1484,7 @@ struct kvm_arch {
 
        bool bus_lock_detection_enabled;
        bool enable_pmu;
+       bool created_mediated_pmu;
 
        u32 notify_window;
        u32 notify_vmexit_flags;
index 487ad19a236e30fe2a8cb61c067036ce6f9f892b..131e24246b09e6e5f7ff35a93e9f7217e200e461 100644 (file)
@@ -135,6 +135,10 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
                        enable_pmu = false;
        }
 
+       if (!enable_pmu || !enable_mediated_pmu || !kvm_host_pmu.mediated ||
+           !pmu_ops->is_mediated_pmu_supported(&kvm_host_pmu))
+               enable_mediated_pmu = false;
+
        if (!enable_pmu) {
                memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
                return;
index 5c3939e91f1da481830538875fe87296a018f0db..a5c7c026b919a1717d8d3dfdba15f6acd0e1dfcf 100644 (file)
@@ -37,6 +37,8 @@ struct kvm_pmu_ops {
        void (*deliver_pmi)(struct kvm_vcpu *vcpu);
        void (*cleanup)(struct kvm_vcpu *vcpu);
 
+       bool (*is_mediated_pmu_supported)(struct x86_pmu_capability *host_pmu);
+
        const u64 EVENTSEL_EVENT;
        const int MAX_NR_GP_COUNTERS;
        const int MIN_NR_GP_COUNTERS;
@@ -58,6 +60,11 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
        return pmu->version > 1;
 }
 
+static inline bool kvm_vcpu_has_mediated_pmu(struct kvm_vcpu *vcpu)
+{
+       return enable_mediated_pmu && vcpu_to_pmu(vcpu)->version;
+}
+
 /*
  * KVM tracks all counters in 64-bit bitmaps, with general purpose counters
  * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
index 1b2827cecf38a3726537a264a0819df9d757c944..fb3a5e861553855c6c5fb52296dcbbac538e2761 100644 (file)
@@ -183,6 +183,10 @@ bool __read_mostly enable_pmu = true;
 EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu);
 module_param(enable_pmu, bool, 0444);
 
+/* Enable/disabled mediated PMU virtualization. */
+bool __read_mostly enable_mediated_pmu;
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_mediated_pmu);
+
 bool __read_mostly eager_page_split = true;
 module_param(eager_page_split, bool, 0644);
 
@@ -6854,7 +6858,7 @@ disable_exits_unlock:
                        break;
 
                mutex_lock(&kvm->lock);
-               if (!kvm->created_vcpus) {
+               if (!kvm->created_vcpus && !kvm->arch.created_mediated_pmu) {
                        kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
                        r = 0;
                }
@@ -12641,8 +12645,13 @@ static int sync_regs(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+#define PERF_MEDIATED_PMU_MSG \
+       "Failed to enable mediated vPMU, try disabling system wide perf events and nmi_watchdog.\n"
+
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 {
+       int r;
+
        if (kvm_check_tsc_unstable() && kvm->created_vcpus)
                pr_warn_once("SMP vm created on host with unstable TSC; "
                             "guest TSC will not be reliable\n");
@@ -12653,7 +12662,29 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
        if (id >= kvm->arch.max_vcpu_ids)
                return -EINVAL;
 
-       return kvm_x86_call(vcpu_precreate)(kvm);
+       /*
+        * Note, any actions done by .vcpu_create() must be idempotent with
+        * respect to creating multiple vCPUs, and therefore are not undone if
+        * creating a vCPU fails (including failure during pre-create).
+        */
+       r = kvm_x86_call(vcpu_precreate)(kvm);
+       if (r)
+               return r;
+
+       if (enable_mediated_pmu && kvm->arch.enable_pmu &&
+           !kvm->arch.created_mediated_pmu) {
+               if (irqchip_in_kernel(kvm)) {
+                       r = perf_create_mediated_pmu();
+                       if (r) {
+                               pr_warn_ratelimited(PERF_MEDIATED_PMU_MSG);
+                               return r;
+                       }
+                       kvm->arch.created_mediated_pmu = true;
+               } else {
+                       kvm->arch.enable_pmu = false;
+               }
+       }
+       return 0;
 }
 
 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
@@ -13319,6 +13350,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
                mutex_unlock(&kvm->slots_lock);
        }
+       if (kvm->arch.created_mediated_pmu)
+               perf_release_mediated_pmu();
        kvm_destroy_vcpus(kvm);
        kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
 #ifdef CONFIG_KVM_IOAPIC
index fdab0ad490988e297a370c85bf4fa54b3dd5ee68..6e1fb1680c0a3df6176ca3e2d7318c99efd7fb3d 100644 (file)
@@ -470,6 +470,7 @@ extern struct kvm_caps kvm_caps;
 extern struct kvm_host_values kvm_host;
 
 extern bool enable_pmu;
+extern bool enable_mediated_pmu;
 
 /*
  * Get a filtered version of KVM's supported XCR0 that strips out dynamic