]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
KVM: selftests: Handle Intel Atom errata that leads to PMU event overcount
authordongsheng <dongsheng.x.zhang@intel.com>
Fri, 19 Sep 2025 21:46:48 +0000 (14:46 -0700)
committerSean Christopherson <seanjc@google.com>
Tue, 23 Sep 2025 15:38:59 +0000 (08:38 -0700)
Add a PMU errata framework and use it to relax precise event counts on
Atom platforms that overcount "Instruction Retired" and "Branch Instruction
Retired" events, as the overcount issues on VM-Exit/VM-Entry are impossible
to prevent from userspace, e.g. the test can't prevent host IRQs.

Setup errata during early initialization and automatically sync the mask
to VMs so that tests can check for errata without having to manually
manage host=>guest variables.

For Intel Atom CPUs, the PMU events "Instruction Retired" or
"Branch Instruction Retired" may be overcounted for some certain
instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
and complex SGX/SMX/CSTATE instructions/flows.

The detailed information can be found in the errata (section SRF7):
https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/

For the Atom platforms before Sierra Forest (including Sierra Forest),
Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
be overcounted on these certain instructions, but for Clearwater Forest
only "Instruction Retired" event is overcounted on these instructions.

Signed-off-by: dongsheng <dongsheng.x.zhang@intel.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Tested-by: Yi Lai <yi1.lai@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Tested-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Link: https://lore.kernel.org/r/20250919214648.1585683-6-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
tools/testing/selftests/kvm/include/x86/pmu.h
tools/testing/selftests/kvm/lib/x86/pmu.c
tools/testing/selftests/kvm/lib/x86/processor.c
tools/testing/selftests/kvm/x86/pmu_counters_test.c
tools/testing/selftests/kvm/x86/pmu_event_filter_test.c

index 2aabda2da00256fb3509f7b9d85a5d181d1a5ab6..72575eadb63a0818259b28a1b8380de8a92db93f 100644 (file)
@@ -5,8 +5,11 @@
 #ifndef SELFTEST_KVM_PMU_H
 #define SELFTEST_KVM_PMU_H
 
+#include <stdbool.h>
 #include <stdint.h>
 
+#include <linux/bits.h>
+
 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
 
 /*
@@ -104,4 +107,17 @@ enum amd_pmu_zen_events {
 extern const uint64_t intel_pmu_arch_events[];
 extern const uint64_t amd_pmu_zen_events[];
 
+enum pmu_errata {
+       INSTRUCTIONS_RETIRED_OVERCOUNT,
+       BRANCHES_RETIRED_OVERCOUNT,
+};
+extern uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void);
+
+static inline bool this_pmu_has_errata(enum pmu_errata errata)
+{
+       return pmu_errata_mask & BIT_ULL(errata);
+}
+
 #endif /* SELFTEST_KVM_PMU_H */
index 5ab44bf54773722ec66a8691000c6b84d2f5a435..34cb57d1d6718917edb6f73c6ac6bbac38256eb6 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 
 #include "kvm_util.h"
+#include "processor.h"
 #include "pmu.h"
 
 const uint64_t intel_pmu_arch_events[] = {
@@ -34,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = {
        AMD_ZEN_BRANCHES_MISPREDICTED,
 };
 kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
+
+/*
+ * For Intel Atom CPUs, the PMU events "Instruction Retired" or
+ * "Branch Instruction Retired" may be overcounted for some certain
+ * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
+ * and complex SGX/SMX/CSTATE instructions/flows.
+ *
+ * The detailed information can be found in the errata (section SRF7):
+ * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
+ *
+ * For the Atom platforms before Sierra Forest (including Sierra Forest),
+ * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
+ * be overcounted on these certain instructions, but for Clearwater Forest
+ * only "Instruction Retired" event is overcounted on these instructions.
+ */
+static uint64_t get_pmu_errata(void)
+{
+       if (!this_cpu_is_intel())
+               return 0;
+
+       if (this_cpu_family() != 0x6)
+               return 0;
+
+       switch (this_cpu_model()) {
+       case 0xDD: /* Clearwater Forest */
+               return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
+       case 0xAF: /* Sierra Forest */
+       case 0x4D: /* Avaton, Rangely */
+       case 0x5F: /* Denverton */
+       case 0x86: /* Jacobsville */
+               return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
+                      BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
+       default:
+               return 0;
+       }
+}
+
+uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void)
+{
+       pmu_errata_mask = get_pmu_errata();
+}
index 3b63c99f7b9639e0de8e629ae9db7f5cb6d78d90..4402d2e1ea69244b80dd9dc65147a00deeeb7544 100644 (file)
@@ -6,6 +6,7 @@
 #include "linux/bitmap.h"
 #include "test_util.h"
 #include "kvm_util.h"
+#include "pmu.h"
 #include "processor.h"
 #include "sev.h"
 
@@ -638,6 +639,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
        sync_global_to_guest(vm, host_cpu_is_intel);
        sync_global_to_guest(vm, host_cpu_is_amd);
        sync_global_to_guest(vm, is_forced_emulation_enabled);
+       sync_global_to_guest(vm, pmu_errata_mask);
 
        if (is_sev_vm(vm)) {
                struct kvm_sev_init init = { 0 };
@@ -1269,6 +1271,8 @@ void kvm_selftest_arch_init(void)
        host_cpu_is_intel = this_cpu_is_intel();
        host_cpu_is_amd = this_cpu_is_amd();
        is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+       kvm_init_pmu_errata();
 }
 
 bool sys_clocksource_is_based_on_tsc(void)
index 24599d98f898d18e7b0cd2a8fdb9b00f65eeec21..eb6c12a2cdd48d71c2f79fc83e2c0d205b033f28 100644 (file)
@@ -163,10 +163,18 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr
 
        switch (idx) {
        case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
-               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+               /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+               if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
+                       GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+               else
+                       GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
                break;
        case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
-               GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+               /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+               if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
+                       GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
+               else
+                       GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
                break;
        case INTEL_ARCH_LLC_REFERENCES_INDEX:
        case INTEL_ARCH_LLC_MISSES_INDEX:
index c15513cd74d11807781301d36b5a22b8397bdfdf..1c5b7611db2418b4a8255ff11b52bf35e290a454 100644 (file)
@@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
 do {                                                                                   \
        uint64_t br = pmc_results.branches_retired;                                     \
        uint64_t ir = pmc_results.instructions_retired;                                 \
+       bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ?             \
+                         br >= NUM_BRANCHES : br == NUM_BRANCHES;                      \
                                                                                        \
-       if (br && br != NUM_BRANCHES)                                                   \
+       if (br && !br_matched)                                                          \
                pr_info("%s: Branch instructions retired = %lu (expected %u)\n",        \
                        __func__, br, NUM_BRANCHES);                                    \
        TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",         \