]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: x86: Enable CET virtualization for VMX and advertise to userspace
authorYang Weijiang <weijiang.yang@intel.com>
Fri, 19 Sep 2025 22:32:35 +0000 (15:32 -0700)
committerSean Christopherson <seanjc@google.com>
Tue, 23 Sep 2025 16:22:32 +0000 (09:22 -0700)
Add support for the LOAD_CET_STATE VM-Enter and VM-Exit controls, the
CET XFEATURE bits in XSS, and  advertise support for IBT and SHSTK to
userspace.  Explicitly clear IBT and SHSTK onn SVM, as additional work is
needed to enable CET on SVM, e.g. to context switch S_CET and other state.

Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
KVM does not support emulating CET, as running without Unrestricted Guest
can result in KVM emulating large swaths of guest code.  While it's highly
unlikely any guest will trigger emulation while also utilizing IBT or
SHSTK, there's zero reason to allow CET without Unrestricted Guest as that
combination should only be possible when explicitly disabling
unrestricted_guest for testing purposes.

Disable CET if VMX_BASIC[bit56] == 0, i.e. if hardware strictly enforces
the presence of an Error Code based on exception vector, as attempting to
inject a #CP with an Error Code (#CP architecturally has an Error Code)
will fail due to the #CP vector historically not having an Error Code.

Clear S_CET and SSP-related VMCS on "reset" to emulate the architectural
of CET MSRs and SSP being reset to 0 after RESET, power-up and INIT.  Note,
KVM already clears guest CET state that is managed via XSTATE in
kvm_xstate_reset().

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
[sean: move some bits to separate patches, massage changelog]
Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250919223258.1604852-29-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/vmx.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h

index ce10a7e2d3d96fa4b4e8a0cbfb5a8a02cafefbec..c85c500195239b2eb4fe08fe090d4b6a816f25f0 100644 (file)
 #define VMX_BASIC_DUAL_MONITOR_TREATMENT       BIT_ULL(49)
 #define VMX_BASIC_INOUT                                BIT_ULL(54)
 #define VMX_BASIC_TRUE_CTLS                    BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC          BIT_ULL(56)
 
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
 {
index b0731304bd79375e0d948df7ef3b1203f66a3cf4..d290dbc968318b9deeb6fe1aa864d3f1770998e7 100644 (file)
@@ -946,6 +946,7 @@ void kvm_set_cpu_caps(void)
                VENDOR_F(WAITPKG),
                F(SGX_LC),
                F(BUS_LOCK_DETECT),
+               X86_64_F(SHSTK),
        );
 
        /*
@@ -980,6 +981,7 @@ void kvm_set_cpu_caps(void)
                F(AMX_INT8),
                F(AMX_BF16),
                F(FLUSH_L1D),
+               F(IBT),
        );
 
        /*
index d20e5917b0fe666e33fe013180a07a7ce09f2b75..ff4925a7bf96fd06fba289754f48c63808c6b78f 100644 (file)
@@ -5222,6 +5222,10 @@ static __init void svm_set_cpu_caps(void)
        kvm_caps.supported_perf_cap = 0;
        kvm_caps.supported_xss = 0;
 
+       /* KVM doesn't yet support CET virtualization for SVM. */
+       kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+       kvm_cpu_cap_clear(X86_FEATURE_IBT);
+
        /* CPUID 0x80000001 and 0x8000000A (SVM features) */
        if (nested) {
                kvm_cpu_cap_set(X86_FEATURE_SVM);
index 59c83888bdc01ba6d6cb1dfbc4678433346fae8e..02aadb9d730e79cfc5eb6fc79e4d00acfe49a3de 100644 (file)
@@ -73,6 +73,11 @@ static inline bool cpu_has_vmx_basic_inout(void)
        return  vmcs_config.basic & VMX_BASIC_INOUT;
 }
 
+static inline bool cpu_has_vmx_basic_no_hw_errcode_cc(void)
+{
+       return  vmcs_config.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC;
+}
+
 static inline bool cpu_has_virtual_nmis(void)
 {
        return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
index 4d775983506e84dfb2a65ad572cf0fbed8bcd01d..dfa6c5cd2d5c51b34b7167fbfba4eda0510faf1e 100644 (file)
@@ -2602,6 +2602,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
                { VM_ENTRY_LOAD_IA32_EFER,              VM_EXIT_LOAD_IA32_EFER },
                { VM_ENTRY_LOAD_BNDCFGS,                VM_EXIT_CLEAR_BNDCFGS },
                { VM_ENTRY_LOAD_IA32_RTIT_CTL,          VM_EXIT_CLEAR_IA32_RTIT_CTL },
+               { VM_ENTRY_LOAD_CET_STATE,              VM_EXIT_LOAD_CET_STATE },
        };
 
        memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -4868,6 +4869,14 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
+       if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+               vmcs_writel(GUEST_SSP, 0);
+               vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
+       }
+       if (kvm_cpu_cap_has(X86_FEATURE_IBT) ||
+           kvm_cpu_cap_has(X86_FEATURE_SHSTK))
+               vmcs_writel(GUEST_S_CET, 0);
+
        kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
        vpid_sync_context(vmx->vpid);
@@ -6335,6 +6344,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
        if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
                vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
 
+       if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE)
+               pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n",
+                      vmcs_readl(GUEST_S_CET), vmcs_readl(GUEST_SSP),
+                      vmcs_readl(GUEST_INTR_SSP_TABLE));
        pr_err("*** Host State ***\n");
        pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
               vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6365,6 +6378,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
                       vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
        if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0)
                vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
+       if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE)
+               pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n",
+                      vmcs_readl(HOST_S_CET), vmcs_readl(HOST_SSP),
+                      vmcs_readl(HOST_INTR_SSP_TABLE));
 
        pr_err("*** Control State ***\n");
        pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
@@ -7946,7 +7963,6 @@ static __init void vmx_set_cpu_caps(void)
                kvm_cpu_cap_set(X86_FEATURE_UMIP);
 
        /* CPUID 0xD.1 */
-       kvm_caps.supported_xss = 0;
        if (!cpu_has_vmx_xsaves())
                kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
 
@@ -7958,6 +7974,18 @@ static __init void vmx_set_cpu_caps(void)
 
        if (cpu_has_vmx_waitpkg())
                kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+       /*
+        * Disable CET if unrestricted_guest is unsupported as KVM doesn't
+        * enforce CET HW behaviors in emulator. On platforms with
+        * VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
+        * fails, so disable CET in this case too.
+        */
+       if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+           !cpu_has_vmx_basic_no_hw_errcode_cc()) {
+               kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+               kvm_cpu_cap_clear(X86_FEATURE_IBT);
+       }
 }
 
 static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
index 23d6e89b96f26a614784afbb2dc1074c153cab2b..af8224e074eef849bee87426294e84a57c932f48 100644 (file)
@@ -484,7 +484,8 @@ static inline u8 vmx_get_rvi(void)
         VM_ENTRY_LOAD_IA32_EFER |                                      \
         VM_ENTRY_LOAD_BNDCFGS |                                        \
         VM_ENTRY_PT_CONCEAL_PIP |                                      \
-        VM_ENTRY_LOAD_IA32_RTIT_CTL)
+        VM_ENTRY_LOAD_IA32_RTIT_CTL |                                  \
+        VM_ENTRY_LOAD_CET_STATE)
 
 #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS                            \
        (VM_EXIT_SAVE_DEBUG_CONTROLS |                                  \
@@ -506,7 +507,8 @@ static inline u8 vmx_get_rvi(void)
               VM_EXIT_LOAD_IA32_EFER |                                 \
               VM_EXIT_CLEAR_BNDCFGS |                                  \
               VM_EXIT_PT_CONCEAL_PIP |                                 \
-              VM_EXIT_CLEAR_IA32_RTIT_CTL)
+              VM_EXIT_CLEAR_IA32_RTIT_CTL |                            \
+              VM_EXIT_LOAD_CET_STATE)
 
 #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL                     \
        (PIN_BASED_EXT_INTR_MASK |                                      \