From 3e51822b2fdf695bda50c2c3f88d6ab022a9e6f3 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Fri, 5 Dec 2025 16:16:52 -0800 Subject: [PATCH] KVM: x86/pmu: Start stubbing in mediated PMU support Introduce enable_mediated_pmu as a global variable, with the intent of exposing it to userspace a vendor module parameter, to control and reflect mediated vPMU support. Wire up the perf plumbing to create+release a mediated PMU, but defer exposing the parameter to userspace until KVM support for a mediated PMUs is fully landed. To (a) minimize compatibility issues, (b) to give userspace a chance to opt out of the restrictive side-effects of perf_create_mediated_pmu(), and (c) to avoid adding new dependencies between enabling an in-kernel irqchip and a mediated vPMU, defer "creating" a mediated PMU in perf until the first vCPU is created. Regarding userspace compatibility, an alternative solution would be to make the mediated PMU fully opt-in, e.g. to avoid unexpected failure due to perf_create_mediated_pmu() failing. Ironically, that approach creates an even bigger compatibility issue, as turning on enable_mediated_pmu would silently break VMMs that don't utilize KVM_CAP_PMU_CAPABILITY (well, silently until the guest tried to access PMU assets). Regarding an in-kernel irqchip, create a mediated PMU if and only if the VM has an in-kernel local APIC, as the mediated PMU will take a hard dependency on forwarding PMIs to the guest without bouncing through host userspace. Silently "drop" the PMU instead of rejecting KVM_CREATE_VCPU, as KVM's existing vPMU support doesn't function correctly if the local APIC is emulated by userspace, e.g. PMIs will never be delivered. I.e. it's far, far more likely that rejecting KVM_CREATE_VCPU would cause problems, e.g. for tests or userspace daemons that just want to probe basic KVM functionality. Note! Deliberately make mediated PMU creation "sticky", i.e. don't unwind it on failure to create a vCPU. Practically speaking, there's no harm to having a VM with a mediated PMU and no vCPUs. To avoid an "impossible" VM setup, reject KVM_CAP_PMU_CAPABILITY if a mediated PMU has been created, i.e. don't let userspace disable PMU support after failed vCPU creation (with PMU support enabled). Defer vendor specific requirements and constraints to the future. Suggested-by: Sean Christopherson Signed-off-by: Dapeng Mi Co-developed-by: Mingwei Zhang Signed-off-by: Mingwei Zhang Tested-by: Xudong Hao Co-developed-by: Sean Christopherson Tested-by: Manali Shukla Link: https://patch.msgid.link/20251206001720.468579-17-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/pmu.c | 4 ++++ arch/x86/kvm/pmu.h | 7 +++++++ arch/x86/kvm/x86.c | 37 +++++++++++++++++++++++++++++++-- arch/x86/kvm/x86.h | 1 + 5 files changed, 48 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5a3bfa293e8b1..defd979003beb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1484,6 +1484,7 @@ struct kvm_arch { bool bus_lock_detection_enabled; bool enable_pmu; + bool created_mediated_pmu; u32 notify_window; u32 notify_vmexit_flags; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 487ad19a236e3..131e24246b09e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -135,6 +135,10 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) enable_pmu = false; } + if (!enable_pmu || !enable_mediated_pmu || !kvm_host_pmu.mediated || + !pmu_ops->is_mediated_pmu_supported(&kvm_host_pmu)) + enable_mediated_pmu = false; + if (!enable_pmu) { memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap)); return; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 5c3939e91f1da..a5c7c026b919a 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -37,6 +37,8 @@ struct kvm_pmu_ops { void (*deliver_pmi)(struct kvm_vcpu *vcpu); void (*cleanup)(struct kvm_vcpu *vcpu); + bool (*is_mediated_pmu_supported)(struct x86_pmu_capability *host_pmu); + const u64 EVENTSEL_EVENT; const int MAX_NR_GP_COUNTERS; const int MIN_NR_GP_COUNTERS; @@ -58,6 +60,11 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu) return pmu->version > 1; } +static inline bool kvm_vcpu_has_mediated_pmu(struct kvm_vcpu *vcpu) +{ + return enable_mediated_pmu && vcpu_to_pmu(vcpu)->version; +} + /* * KVM tracks all counters in 64-bit bitmaps, with general purpose counters * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b2827cecf38a..fb3a5e8615538 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -183,6 +183,10 @@ bool __read_mostly enable_pmu = true; EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu); module_param(enable_pmu, bool, 0444); +/* Enable/disabled mediated PMU virtualization. */ +bool __read_mostly enable_mediated_pmu; +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_mediated_pmu); + bool __read_mostly eager_page_split = true; module_param(eager_page_split, bool, 0644); @@ -6854,7 +6858,7 @@ disable_exits_unlock: break; mutex_lock(&kvm->lock); - if (!kvm->created_vcpus) { + if (!kvm->created_vcpus && !kvm->arch.created_mediated_pmu) { kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE); r = 0; } @@ -12641,8 +12645,13 @@ static int sync_regs(struct kvm_vcpu *vcpu) return 0; } +#define PERF_MEDIATED_PMU_MSG \ + "Failed to enable mediated vPMU, try disabling system wide perf events and nmi_watchdog.\n" + int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { + int r; + if (kvm_check_tsc_unstable() && kvm->created_vcpus) pr_warn_once("SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); @@ -12653,7 +12662,29 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) if (id >= kvm->arch.max_vcpu_ids) return -EINVAL; - return kvm_x86_call(vcpu_precreate)(kvm); + /* + * Note, any actions done by .vcpu_create() must be idempotent with + * respect to creating multiple vCPUs, and therefore are not undone if + * creating a vCPU fails (including failure during pre-create). + */ + r = kvm_x86_call(vcpu_precreate)(kvm); + if (r) + return r; + + if (enable_mediated_pmu && kvm->arch.enable_pmu && + !kvm->arch.created_mediated_pmu) { + if (irqchip_in_kernel(kvm)) { + r = perf_create_mediated_pmu(); + if (r) { + pr_warn_ratelimited(PERF_MEDIATED_PMU_MSG); + return r; + } + kvm->arch.created_mediated_pmu = true; + } else { + kvm->arch.enable_pmu = false; + } + } + return 0; } int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) @@ -13319,6 +13350,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); mutex_unlock(&kvm->slots_lock); } + if (kvm->arch.created_mediated_pmu) + perf_release_mediated_pmu(); kvm_destroy_vcpus(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); #ifdef CONFIG_KVM_IOAPIC diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index fdab0ad490988..6e1fb1680c0a3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -470,6 +470,7 @@ extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; extern bool enable_pmu; +extern bool enable_mediated_pmu; /* * Get a filtered version of KVM's supported XCR0 that strips out dynamic -- 2.47.3