]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: x86: Introduce kvm_cpu_caps to replace runtime CPUID masking
authorSean Christopherson <sean.j.christopherson@intel.com>
Mon, 2 Mar 2020 23:56:41 +0000 (15:56 -0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 16 Mar 2020 16:58:23 +0000 (17:58 +0100)
Calculate the CPUID masks for KVM_GET_SUPPORTED_CPUID at load time using
what is effectively a KVM-adjusted copy of boot_cpu_data, or more
precisely, the x86_capability array in boot_cpu_data.

In terms of KVM support, the vast majority of CPUID feature bits are
constant, and *all* feature support is known at KVM load time.  Rather
than apply boot_cpu_data, which is effectively read-only after init,
at runtime, copy it into a KVM-specific array and use *that* to mask
CPUID registers.

In additional to consolidating the masking, kvm_cpu_caps can be adjusted
by SVM/VMX at load time and thus eliminate all feature bit manipulation
in ->set_supported_cpuid().

Opportunistically clean up a few warts:

  - Replace bare "unsigned" with "unsigned int" when a feature flag is
    captured in a local variable, e.g. f_nx.

  - Sort the CPUID masks by function, index and register (alphabetically
    for registers, i.e. EBX comes before ECX/EDX).

  - Remove the superfluous /* cpuid 7.0.ecx */ comments.

No functional change intended.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
[Call kvm_set_cpu_caps from kvm_x86_ops->hardware_setup due to fixed
 GBPAGES patch. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx/vmx.c

index 3e4b03c8ec12fdfd8a0882c9b7c9c66bb1f3115b..31ea934d9b4998f6eea95397405c0603fa54500a 100644 (file)
 #include "trace.h"
 #include "pmu.h"
 
+/*
+ * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
+ * aligned to sizeof(unsigned long) because it's not accessed via bitops.
+ */
+u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+EXPORT_SYMBOL_GPL(kvm_cpu_caps);
+
 static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
        int feature_bit = 0;
@@ -254,6 +261,123 @@ out:
        return r;
 }
 
+static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
+{
+       reverse_cpuid_check(leaf);
+       kvm_cpu_caps[leaf] &= mask;
+}
+
+void kvm_set_cpu_caps(void)
+{
+       unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
+#ifdef CONFIG_X86_64
+       unsigned int f_gbpages = F(GBPAGES);
+       unsigned int f_lm = F(LM);
+#else
+       unsigned int f_gbpages = 0;
+       unsigned int f_lm = 0;
+#endif
+
+       BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
+                    sizeof(boot_cpu_data.x86_capability));
+
+       memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
+              sizeof(kvm_cpu_caps));
+
+       kvm_cpu_cap_mask(CPUID_1_ECX,
+               /*
+                * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not*
+                * advertised to guests via CPUID!
+                */
+               F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
+               0 /* DS-CPL, VMX, SMX, EST */ |
+               0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
+               F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
+               F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
+               F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
+               0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
+               F(F16C) | F(RDRAND)
+       );
+
+       kvm_cpu_cap_mask(CPUID_1_EDX,
+               F(FPU) | F(VME) | F(DE) | F(PSE) |
+               F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+               F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
+               F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+               F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
+               0 /* Reserved, DS, ACPI */ | F(MMX) |
+               F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
+               0 /* HTT, TM, Reserved, PBE */
+       );
+
+       kvm_cpu_cap_mask(CPUID_7_0_EBX,
+               F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+               F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
+               F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+               F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+               F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
+       );
+
+       kvm_cpu_cap_mask(CPUID_7_ECX,
+               F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
+               F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
+       );
+       /* Set LA57 based on hardware capability. */
+       if (cpuid_ecx(7) & F(LA57))
+               kvm_cpu_cap_set(X86_FEATURE_LA57);
+
+       kvm_cpu_cap_mask(CPUID_7_EDX,
+               F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+               F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
+               F(MD_CLEAR)
+       );
+
+       kvm_cpu_cap_mask(CPUID_7_1_EAX,
+               F(AVX512_BF16)
+       );
+
+       kvm_cpu_cap_mask(CPUID_D_1_EAX,
+               F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
+       );
+
+       kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
+               F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
+               F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
+               F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
+               0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+               F(TOPOEXT) | F(PERFCTR_CORE)
+       );
+
+       kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
+               F(FPU) | F(VME) | F(DE) | F(PSE) |
+               F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+               F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
+               F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+               F(PAT) | F(PSE36) | 0 /* Reserved */ |
+               f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+               F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
+               0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
+       );
+
+       if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))
+               kvm_cpu_cap_set(X86_FEATURE_GBPAGES);
+
+       kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
+               F(CLZERO) | F(XSAVEERPTR) |
+               F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
+               F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON)
+       );
+
+       kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
+               F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+               F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
+               F(PMM) | F(PMM_EN)
+       );
+}
+EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
+
 struct kvm_cpuid_array {
        struct kvm_cpuid_entry2 *entries;
        const int maxnent;
@@ -331,48 +455,13 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
 
 static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
 {
-       unsigned f_la57;
-
-       /* cpuid 7.0.ebx */
-       const u32 kvm_cpuid_7_0_ebx_x86_features =
-               F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-               F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
-               F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
-               F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
-               F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/;
-
-       /* cpuid 7.0.ecx*/
-       const u32 kvm_cpuid_7_0_ecx_x86_features =
-               F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
-               F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
-               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
-
-       /* cpuid 7.0.edx*/
-       const u32 kvm_cpuid_7_0_edx_x86_features =
-               F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
-               F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
-               F(MD_CLEAR);
-
-       /* cpuid 7.1.eax */
-       const u32 kvm_cpuid_7_1_eax_x86_features =
-               F(AVX512_BF16);
-
        switch (entry->index) {
        case 0:
                entry->eax = min(entry->eax, 1u);
-               entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
                cpuid_entry_mask(entry, CPUID_7_0_EBX);
                /* TSC_ADJUST is emulated */
                cpuid_entry_set(entry, X86_FEATURE_TSC_ADJUST);
-
-               entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
-               f_la57 = cpuid_entry_get(entry, X86_FEATURE_LA57);
                cpuid_entry_mask(entry, CPUID_7_ECX);
-               /* Set LA57 based on hardware capability. */
-               entry->ecx |= f_la57;
-
-               entry->edx &= kvm_cpuid_7_0_edx_x86_features;
                cpuid_entry_mask(entry, CPUID_7_EDX);
                if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
                        cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL);
@@ -387,7 +476,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
                cpuid_entry_set(entry, X86_FEATURE_ARCH_CAPABILITIES);
                break;
        case 1:
-               entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+               cpuid_entry_mask(entry, CPUID_7_1_EAX);
                entry->ebx = 0;
                entry->ecx = 0;
                entry->edx = 0;
@@ -406,72 +495,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 {
        struct kvm_cpuid_entry2 *entry;
        int r, i, max_idx;
-       unsigned f_nx = is_efer_nx() ? F(NX) : 0;
-#ifdef CONFIG_X86_64
-       unsigned f_gbpages = F(GBPAGES);
-       unsigned f_lm = F(LM);
-#else
-       unsigned f_gbpages = 0;
-       unsigned f_lm = 0;
-#endif
        unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
 
-       /* cpuid 1.edx */
-       const u32 kvm_cpuid_1_edx_x86_features =
-               F(FPU) | F(VME) | F(DE) | F(PSE) |
-               F(TSC) | F(MSR) | F(PAE) | F(MCE) |
-               F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
-               F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
-               F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
-               0 /* Reserved, DS, ACPI */ | F(MMX) |
-               F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
-               0 /* HTT, TM, Reserved, PBE */;
-       /* cpuid 0x80000001.edx */
-       const u32 kvm_cpuid_8000_0001_edx_x86_features =
-               F(FPU) | F(VME) | F(DE) | F(PSE) |
-               F(TSC) | F(MSR) | F(PAE) | F(MCE) |
-               F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
-               F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
-               F(PAT) | F(PSE36) | 0 /* Reserved */ |
-               f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-               F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
-               0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
-       /* cpuid 1.ecx */
-       const u32 kvm_cpuid_1_ecx_x86_features =
-               /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
-                * but *not* advertised to guests via CPUID ! */
-               F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
-               0 /* DS-CPL, VMX, SMX, EST */ |
-               0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
-               F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
-               F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
-               F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-               0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
-               F(F16C) | F(RDRAND);
-       /* cpuid 0x80000001.ecx */
-       const u32 kvm_cpuid_8000_0001_ecx_x86_features =
-               F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
-               F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
-               F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
-               0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
-               F(TOPOEXT) | F(PERFCTR_CORE);
-
-       /* cpuid 0x80000008.ebx */
-       const u32 kvm_cpuid_8000_0008_ebx_x86_features =
-               F(CLZERO) | F(XSAVEERPTR) |
-               F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
-               F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
-
-       /* cpuid 0xC0000001.edx */
-       const u32 kvm_cpuid_C000_0001_edx_x86_features =
-               F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
-               F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
-               F(PMM) | F(PMM_EN);
-
-       /* cpuid 0xD.1.eax */
-       const u32 kvm_cpuid_D_1_eax_x86_features =
-               F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES);
-
        /* all calls to cpuid_count() should be made on the same cpu */
        get_cpu();
 
@@ -487,9 +512,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                entry->eax = min(entry->eax, 0x1fU);
                break;
        case 1:
-               entry->edx &= kvm_cpuid_1_edx_x86_features;
                cpuid_entry_mask(entry, CPUID_1_EDX);
-               entry->ecx &= kvm_cpuid_1_ecx_x86_features;
                cpuid_entry_mask(entry, CPUID_1_ECX);
                /* we support x2apic emulation even if host does not support
                 * it since we emulate x2apic in software */
@@ -599,7 +622,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                if (!entry)
                        goto out;
 
-               entry->eax &= kvm_cpuid_D_1_eax_x86_features;
                cpuid_entry_mask(entry, CPUID_D_1_EAX);
 
                if (!kvm_x86_ops->xsaves_supported())
@@ -683,11 +705,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                entry->eax = min(entry->eax, 0x8000001f);
                break;
        case 0x80000001:
-               entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
                cpuid_entry_mask(entry, CPUID_8000_0001_EDX);
+               /* Add it manually because it may not be in host CPUID.  */
                if (!tdp_enabled)
                        cpuid_entry_set(entry, X86_FEATURE_GBPAGES);
-               entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
                cpuid_entry_mask(entry, CPUID_8000_0001_ECX);
                break;
        case 0x80000007: /* Advanced power management */
@@ -706,7 +727,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                        g_phys_as = phys_as;
                entry->eax = g_phys_as | (virt_as << 8);
                entry->edx = 0;
-               entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
                cpuid_entry_mask(entry, CPUID_8000_0008_EBX);
                /*
                 * AMD has separate bits for each SPEC_CTRL bit.
@@ -749,7 +769,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                entry->eax = min(entry->eax, 0xC0000004);
                break;
        case 0xC0000001:
-               entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
                cpuid_entry_mask(entry, CPUID_C000_0001_EDX);
                break;
        case 3: /* Processor serial number */
index 407dc26c06337844c46f98cdd227bb168c5119fc..13374f885c81d667028dc55818b214aa3c845a3f 100644 (file)
@@ -6,6 +6,9 @@
 #include <asm/cpu.h>
 #include <asm/processor.h>
 
+extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+void kvm_set_cpu_caps(void);
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
                                              u32 function, u32 index);
@@ -172,7 +175,8 @@ static __always_inline void cpuid_entry_mask(struct kvm_cpuid_entry2 *entry,
 {
        u32 *reg = cpuid_entry_get_reg(entry, leaf * 32);
 
-       *reg &= boot_cpu_data.x86_capability[leaf];
+       BUILD_BUG_ON(leaf >= ARRAY_SIZE(kvm_cpu_caps));
+       *reg &= kvm_cpu_caps[leaf];
 }
 
 static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
@@ -262,4 +266,20 @@ static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
                  MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
 }
 
+static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
+{
+       unsigned int x86_leaf = x86_feature / 32;
+
+       reverse_cpuid_check(x86_leaf);
+       kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+}
+
+static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
+{
+       unsigned int x86_leaf = x86_feature / 32;
+
+       reverse_cpuid_check(x86_leaf);
+       kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+}
+
 #endif
index 8984ae140689dc739c22f71d67c9306497527831..aae5e3eff48dcdbf1c46e7964e5a9e24821c6d44 100644 (file)
@@ -1479,6 +1479,8 @@ static __init int svm_hardware_setup(void)
                        pr_info("Virtual GIF supported\n");
        }
 
+       kvm_set_cpu_caps();
+
        return 0;
 
 err:
index bf27cb8ac3fc87722ed855208373c9f1da2aac8c..ae482f4f567801cd4abc444076de96dadd905933 100644 (file)
@@ -7818,6 +7818,8 @@ static __init int hardware_setup(void)
                        return r;
        }
 
+       kvm_set_cpu_caps();
+
        r = alloc_kvm_area();
        if (r)
                nested_vmx_hardware_unsetup();