Wire up system vector 0xf5 for handling PMIs (i.e. interrupts delivered
through the LVTPC) while running KVM guests with a mediated PMU. Perf
currently delivers all PMIs as NMIs, e.g. so that events that trigger while
IRQs are disabled aren't delayed and generate useless records, but due to
the multiplexing of NMIs throughout the system, correctly identifying NMIs
for a mediated PMU is practically infeasible.
To (greatly) simplify identifying guest mediated PMU PMIs, perf will
switch the CPU's LVTPC between PERF_GUEST_MEDIATED_PMI_VECTOR and NMI when
guest PMU context is loaded/put. I.e. PMIs that are generated by the CPU
while the guest is active will be identified purely based on the IRQ
vector.
Route the vector through perf, e.g. as opposed to letting KVM attach a
handler directly a la posted interrupt notification vectors, as perf owns
the LVTPC and thus is the rightful owner of PERF_GUEST_MEDIATED_PMI_VECTOR.
Functionally, having KVM directly own the vector would be fine (both KVM
and perf will be completely aware of when a mediated PMU is active), but
would lead to an undesirable split in ownership: perf would be responsible
for installing the vector, but not handling the resulting IRQs.
Add a new perf_guest_info_callbacks hook (and static call) to allow KVM to
register its handler with perf when running guests with mediated PMUs.
Note, because KVM always runs guests with host IRQs enabled, there is no
danger of a PMI being delayed from the guest's perspective due to using a
regular IRQ instead of an NMI.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Link: https://patch.msgid.link/20251206001720.468579-9-seanjc@google.com
SYSVEC(IRQ_WORK_VECTOR, irq_work),
+ SYSVEC(PERF_GUEST_MEDIATED_PMI_VECTOR, perf_guest_mediated_pmi_handler),
SYSVEC(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
SYSVEC(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
SYSVEC(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
unsigned int kvm_posted_intr_ipis;
unsigned int kvm_posted_intr_wakeup_ipis;
unsigned int kvm_posted_intr_nested_ipis;
+#endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+ unsigned int perf_guest_mediated_pmis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
# define fred_sysvec_kvm_posted_intr_nested_ipi NULL
#endif
+# ifdef CONFIG_GUEST_PERF_EVENTS
+DECLARE_IDTENTRY_SYSVEC(PERF_GUEST_MEDIATED_PMI_VECTOR, sysvec_perf_guest_mediated_pmi_handler);
+#else
+# define fred_sysvec_perf_guest_mediated_pmi_handler NULL
+#endif
+
# ifdef CONFIG_X86_POSTED_MSI
DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification);
#else
*/
#define IRQ_WORK_VECTOR 0xf6
-/* 0xf5 - unused, was UV_BAU_MESSAGE */
+/* IRQ vector for PMIs when running a guest with a mediated PMU. */
+#define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5
+
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi),
INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi),
# endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+ INTG(PERF_GUEST_MEDIATED_PMI_VECTOR, asm_sysvec_perf_guest_mediated_pmi_handler),
+#endif
# ifdef CONFIG_IRQ_WORK
INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work),
# endif
irq_stats(j)->kvm_posted_intr_wakeup_ipis);
seq_puts(p, " Posted-interrupt wakeup event\n");
#endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+ seq_printf(p, "%*s: ", prec, "VPMI");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->perf_guest_mediated_pmis);
+ seq_puts(p, " Perf Guest Mediated PMI\n");
+#endif
#ifdef CONFIG_X86_POSTED_MSI
seq_printf(p, "%*s: ", prec, "PMN");
for_each_online_cpu(j)
}
#endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+/*
+ * Handler for PERF_GUEST_MEDIATED_PMI_VECTOR.
+ */
+DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler)
+{
+ apic_eoi();
+ inc_irq_stat(perf_guest_mediated_pmis);
+ perf_guest_handle_mediated_pmi();
+}
+#endif
+
#if IS_ENABLED(CONFIG_KVM)
static void dummy_handler(void) {}
static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
unsigned int (*state)(void);
unsigned long (*get_ip)(void);
unsigned int (*handle_intel_pt_intr)(void);
+
+ void (*handle_mediated_pmi)(void);
};
#ifdef CONFIG_GUEST_PERF_EVENTS
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+DECLARE_STATIC_CALL(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi);
static inline unsigned int perf_guest_state(void)
{
return static_call(__perf_guest_handle_intel_pt_intr)();
}
+static inline void perf_guest_handle_mediated_pmi(void)
+{
+ static_call(__perf_guest_handle_mediated_pmi)();
+}
+
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+DEFINE_STATIC_CALL_RET0(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi);
void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
if (cbs->handle_intel_pt_intr)
static_call_update(__perf_guest_handle_intel_pt_intr,
cbs->handle_intel_pt_intr);
+
+ if (cbs->handle_mediated_pmi)
+ static_call_update(__perf_guest_handle_mediated_pmi,
+ cbs->handle_mediated_pmi);
}
EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
rcu_assign_pointer(perf_guest_cbs, NULL);
static_call_update(__perf_guest_state, (void *)&__static_call_return0);
static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
- static_call_update(__perf_guest_handle_intel_pt_intr,
- (void *)&__static_call_return0);
+ static_call_update(__perf_guest_handle_intel_pt_intr, (void *)&__static_call_return0);
+ static_call_update(__perf_guest_handle_mediated_pmi, (void *)&__static_call_return0);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
*/
#define IRQ_WORK_VECTOR 0xf6
-/* 0xf5 - unused, was UV_BAU_MESSAGE */
+#define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5
+
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
.state = kvm_guest_state,
.get_ip = kvm_guest_get_ip,
.handle_intel_pt_intr = NULL,
+ .handle_mediated_pmi = NULL,
};
void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void))
{
kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler;
+ kvm_guest_cbs.handle_mediated_pmi = NULL;
+
perf_register_guest_info_callbacks(&kvm_guest_cbs);
}
void kvm_unregister_perf_callbacks(void)