]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
perf/x86/core: Register a new vector for handling mediated guest PMIs
authorSean Christopherson <seanjc@google.com>
Sat, 6 Dec 2025 00:16:44 +0000 (16:16 -0800)
committerPeter Zijlstra <peterz@infradead.org>
Wed, 17 Dec 2025 12:31:05 +0000 (13:31 +0100)
Wire up system vector 0xf5 for handling PMIs (i.e. interrupts delivered
through the LVTPC) while running KVM guests with a mediated PMU.  Perf
currently delivers all PMIs as NMIs, e.g. so that events that trigger while
IRQs are disabled aren't delayed and generate useless records, but due to
the multiplexing of NMIs throughout the system, correctly identifying NMIs
for a mediated PMU is practically infeasible.

To (greatly) simplify identifying guest mediated PMU PMIs, perf will
switch the CPU's LVTPC between PERF_GUEST_MEDIATED_PMI_VECTOR and NMI when
guest PMU context is loaded/put.  I.e. PMIs that are generated by the CPU
while the guest is active will be identified purely based on the IRQ
vector.

Route the vector through perf, e.g. as opposed to letting KVM attach a
handler directly a la posted interrupt notification vectors, as perf owns
the LVTPC and thus is the rightful owner of PERF_GUEST_MEDIATED_PMI_VECTOR.
Functionally, having KVM directly own the vector would be fine (both KVM
and perf will be completely aware of when a mediated PMU is active), but
would lead to an undesirable split in ownership: perf would be responsible
for installing the vector, but not handling the resulting IRQs.

Add a new perf_guest_info_callbacks hook (and static call) to allow KVM to
register its handler with perf when running guests with mediated PMUs.

Note, because KVM always runs guests with host IRQs enabled, there is no
danger of a PMI being delayed from the guest's perspective due to using a
regular IRQ instead of an NMI.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Link: https://patch.msgid.link/20251206001720.468579-9-seanjc@google.com
arch/x86/entry/entry_fred.c
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/irq_vectors.h
arch/x86/kernel/idt.c
arch/x86/kernel/irq.c
include/linux/perf_event.h
kernel/events/core.c
tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
virt/kvm/kvm_main.c

index 94e626cc6a0742973c31e22def1aac982c3f6758..a9b72997103d271a11dc511002fb3f73b5727d7d 100644 (file)
@@ -114,6 +114,7 @@ static idtentry_t sysvec_table[NR_SYSTEM_VECTORS] __ro_after_init = {
 
        SYSVEC(IRQ_WORK_VECTOR,                 irq_work),
 
+       SYSVEC(PERF_GUEST_MEDIATED_PMI_VECTOR,  perf_guest_mediated_pmi_handler),
        SYSVEC(POSTED_INTR_VECTOR,              kvm_posted_intr_ipi),
        SYSVEC(POSTED_INTR_WAKEUP_VECTOR,       kvm_posted_intr_wakeup_ipi),
        SYSVEC(POSTED_INTR_NESTED_VECTOR,       kvm_posted_intr_nested_ipi),
index 6b6d472baa0bb66974fc6b43919ea6d75befdc08..9314642ae93cf885669928c6a89d74e73ca56dc6 100644 (file)
@@ -18,6 +18,9 @@ typedef struct {
        unsigned int kvm_posted_intr_ipis;
        unsigned int kvm_posted_intr_wakeup_ipis;
        unsigned int kvm_posted_intr_nested_ipis;
+#endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+       unsigned int perf_guest_mediated_pmis;
 #endif
        unsigned int x86_platform_ipis; /* arch dependent */
        unsigned int apic_perf_irqs;
index 3218770670d38319b930f93e3ff0b7d7a01b6074..42bf6a58ec3688116a9b9fd2cc5bedcc8551fd0e 100644 (file)
@@ -746,6 +746,12 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
 # define fred_sysvec_kvm_posted_intr_nested_ipi                NULL
 #endif
 
+# ifdef CONFIG_GUEST_PERF_EVENTS
+DECLARE_IDTENTRY_SYSVEC(PERF_GUEST_MEDIATED_PMI_VECTOR,        sysvec_perf_guest_mediated_pmi_handler);
+#else
+# define fred_sysvec_perf_guest_mediated_pmi_handler   NULL
+#endif
+
 # ifdef CONFIG_X86_POSTED_MSI
 DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR,        sysvec_posted_msi_notification);
 #else
index 47051871b436182f69c0995109290ceec267b99a..85253fc8e3845d53364764dd0e9cc8d6c5e3cc2c 100644 (file)
@@ -77,7 +77,9 @@
  */
 #define IRQ_WORK_VECTOR                        0xf6
 
-/* 0xf5 - unused, was UV_BAU_MESSAGE */
+/* IRQ vector for PMIs when running a guest with a mediated PMU. */
+#define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5
+
 #define DEFERRED_ERROR_VECTOR          0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */
index f445bec516a0b897b72314bf806f61293ed97ad5..2604565887564a3207f3a6513a974ac36391eac9 100644 (file)
@@ -158,6 +158,9 @@ static const __initconst struct idt_data apic_idts[] = {
        INTG(POSTED_INTR_WAKEUP_VECTOR,         asm_sysvec_kvm_posted_intr_wakeup_ipi),
        INTG(POSTED_INTR_NESTED_VECTOR,         asm_sysvec_kvm_posted_intr_nested_ipi),
 # endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+       INTG(PERF_GUEST_MEDIATED_PMI_VECTOR,    asm_sysvec_perf_guest_mediated_pmi_handler),
+#endif
 # ifdef CONFIG_IRQ_WORK
        INTG(IRQ_WORK_VECTOR,                   asm_sysvec_irq_work),
 # endif
index 86f4e574de026722d687469a2ef16eb7bfc7472f..d56185b49a0e97cc8ea082e3859098cd38ebea83 100644 (file)
@@ -192,6 +192,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
                           irq_stats(j)->kvm_posted_intr_wakeup_ipis);
        seq_puts(p, "  Posted-interrupt wakeup event\n");
 #endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+       seq_printf(p, "%*s: ", prec, "VPMI");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ",
+                          irq_stats(j)->perf_guest_mediated_pmis);
+       seq_puts(p, " Perf Guest Mediated PMI\n");
+#endif
 #ifdef CONFIG_X86_POSTED_MSI
        seq_printf(p, "%*s: ", prec, "PMN");
        for_each_online_cpu(j)
@@ -349,6 +356,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
 }
 #endif
 
+#ifdef CONFIG_GUEST_PERF_EVENTS
+/*
+ * Handler for PERF_GUEST_MEDIATED_PMI_VECTOR.
+ */
+DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler)
+{
+        apic_eoi();
+        inc_irq_stat(perf_guest_mediated_pmis);
+        perf_guest_handle_mediated_pmi();
+}
+#endif
+
 #if IS_ENABLED(CONFIG_KVM)
 static void dummy_handler(void) {}
 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
index 322cfa9f3d481f9e6d44a69b78fdb260e73bfaea..82e617fad165565b810b9d134f59759282fd569e 100644 (file)
@@ -1677,6 +1677,8 @@ struct perf_guest_info_callbacks {
        unsigned int                    (*state)(void);
        unsigned long                   (*get_ip)(void);
        unsigned int                    (*handle_intel_pt_intr)(void);
+
+       void                            (*handle_mediated_pmi)(void);
 };
 
 #ifdef CONFIG_GUEST_PERF_EVENTS
@@ -1686,6 +1688,7 @@ extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
 DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
 DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
 DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+DECLARE_STATIC_CALL(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi);
 
 static inline unsigned int perf_guest_state(void)
 {
@@ -1702,6 +1705,11 @@ static inline unsigned int perf_guest_handle_intel_pt_intr(void)
        return static_call(__perf_guest_handle_intel_pt_intr)();
 }
 
+static inline void perf_guest_handle_mediated_pmi(void)
+{
+       static_call(__perf_guest_handle_mediated_pmi)();
+}
+
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 
index bbb81a4a31965d8190bc045fb0c09b7558faf272..dd842a4ca7895c381db6bb1c59bade76ead48d30 100644 (file)
@@ -7644,6 +7644,7 @@ struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
 DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
 DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
 DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+DEFINE_STATIC_CALL_RET0(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi);
 
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
@@ -7658,6 +7659,10 @@ void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
        if (cbs->handle_intel_pt_intr)
                static_call_update(__perf_guest_handle_intel_pt_intr,
                                   cbs->handle_intel_pt_intr);
+
+       if (cbs->handle_mediated_pmi)
+               static_call_update(__perf_guest_handle_mediated_pmi,
+                                  cbs->handle_mediated_pmi);
 }
 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
 
@@ -7669,8 +7674,8 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
        rcu_assign_pointer(perf_guest_cbs, NULL);
        static_call_update(__perf_guest_state, (void *)&__static_call_return0);
        static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
-       static_call_update(__perf_guest_handle_intel_pt_intr,
-                          (void *)&__static_call_return0);
+       static_call_update(__perf_guest_handle_intel_pt_intr, (void *)&__static_call_return0);
+       static_call_update(__perf_guest_handle_mediated_pmi, (void *)&__static_call_return0);
        synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
index 47051871b436182f69c0995109290ceec267b99a..6e1d5b955aae474a9b41d204b49a001ec65d732e 100644 (file)
@@ -77,7 +77,8 @@
  */
 #define IRQ_WORK_VECTOR                        0xf6
 
-/* 0xf5 - unused, was UV_BAU_MESSAGE */
+#define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5
+
 #define DEFERRED_ERROR_VECTOR          0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */
index 5fcd401a58973d645661fe84188f34ee559c06a6..21a0d226d63f8dae8b285537a2c67c0a2d06db96 100644 (file)
@@ -6467,11 +6467,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
        .state                  = kvm_guest_state,
        .get_ip                 = kvm_guest_get_ip,
        .handle_intel_pt_intr   = NULL,
+       .handle_mediated_pmi    = NULL,
 };
 
 void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void))
 {
        kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler;
+       kvm_guest_cbs.handle_mediated_pmi = NULL;
+
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
 }
 void kvm_unregister_perf_callbacks(void)