]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: nVMX: Prepare for enabling CET support for nested guest
authorYang Weijiang <weijiang.yang@intel.com>
Fri, 19 Sep 2025 22:32:38 +0000 (15:32 -0700)
committerSean Christopherson <seanjc@google.com>
Tue, 23 Sep 2025 16:24:30 +0000 (09:24 -0700)
Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting
to enable CET for nested VM.

vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants
to resume L2, that way correct CET states can be observed by one another.

Please note that consistency checks regarding CET state during VM-Entry
will be added later to prevent this patch from becoming too large.
Advertising the new CET VM_ENTRY/EXIT control bits are also be deferred
until after the consistency checks are added.

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Tested-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Xin Li (Intel) <xin@zytor.com>
Tested-by: Xin Li (Intel) <xin@zytor.com>
Link: https://lore.kernel.org/r/20250919223258.1604852-32-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmcs12.c
arch/x86/kvm/vmx/vmcs12.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h

index b644f4599f70b953d16fbc3c34f4083a32fd3c97..11e5d3569933a8dac9a9c4476f528fd0d98fdd4f 100644 (file)
@@ -721,6 +721,24 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
        nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
                                         MSR_IA32_MPERF, MSR_TYPE_R);
 
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_U_CET, MSR_TYPE_RW);
+
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_S_CET, MSR_TYPE_RW);
+
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_PL0_SSP, MSR_TYPE_RW);
+
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_PL1_SSP, MSR_TYPE_RW);
+
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_PL2_SSP, MSR_TYPE_RW);
+
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_PL3_SSP, MSR_TYPE_RW);
+
        kvm_vcpu_unmap(vcpu, &map);
 
        vmx->nested.force_msr_bitmap_recalc = false;
@@ -2521,6 +2539,32 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
        }
 }
 
+static void vmcs_read_cet_state(struct kvm_vcpu *vcpu, u64 *s_cet,
+                               u64 *ssp, u64 *ssp_tbl)
+{
+       if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) ||
+           guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+               *s_cet = vmcs_readl(GUEST_S_CET);
+
+       if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+               *ssp = vmcs_readl(GUEST_SSP);
+               *ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE);
+       }
+}
+
+static void vmcs_write_cet_state(struct kvm_vcpu *vcpu, u64 s_cet,
+                                u64 ssp, u64 ssp_tbl)
+{
+       if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) ||
+           guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+               vmcs_writel(GUEST_S_CET, s_cet);
+
+       if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+               vmcs_writel(GUEST_SSP, ssp);
+               vmcs_writel(GUEST_INTR_SSP_TABLE, ssp_tbl);
+       }
+}
+
 static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 {
        struct hv_enlightened_vmcs *hv_evmcs = nested_vmx_evmcs(vmx);
@@ -2637,6 +2681,10 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
        vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
        vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 
+       if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)
+               vmcs_write_cet_state(&vmx->vcpu, vmcs12->guest_s_cet,
+                                    vmcs12->guest_ssp, vmcs12->guest_ssp_tbl);
+
        set_cr4_guest_host_mask(vmx);
 }
 
@@ -2676,6 +2724,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
                vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
        }
+
+       if (!vmx->nested.nested_run_pending ||
+           !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
+               vmcs_write_cet_state(vcpu, vmx->nested.pre_vmenter_s_cet,
+                                    vmx->nested.pre_vmenter_ssp,
+                                    vmx->nested.pre_vmenter_ssp_tbl);
+
        if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
            !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
                vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
@@ -3551,6 +3606,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
             !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
                vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
 
+       if (!vmx->nested.nested_run_pending ||
+           !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
+               vmcs_read_cet_state(vcpu, &vmx->nested.pre_vmenter_s_cet,
+                                   &vmx->nested.pre_vmenter_ssp,
+                                   &vmx->nested.pre_vmenter_ssp_tbl);
+
        /*
         * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
         * nested early checks are disabled.  In the event of a "late" VM-Fail,
@@ -4634,6 +4695,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
                vmcs12->guest_ia32_efer = vcpu->arch.efer;
+
+       vmcs_read_cet_state(&vmx->vcpu, &vmcs12->guest_s_cet,
+                           &vmcs12->guest_ssp,
+                           &vmcs12->guest_ssp_tbl);
 }
 
 /*
@@ -4759,6 +4824,18 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
                vmcs_write64(GUEST_BNDCFGS, 0);
 
+       /*
+        * Load CET state from host state if VM_EXIT_LOAD_CET_STATE is set.
+        * otherwise CET state should be retained across VM-exit, i.e.,
+        * guest values should be propagated from vmcs12 to vmcs01.
+        */
+       if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE)
+               vmcs_write_cet_state(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp,
+                                    vmcs12->host_ssp_tbl);
+       else
+               vmcs_write_cet_state(vcpu, vmcs12->guest_s_cet, vmcs12->guest_ssp,
+                                    vmcs12->guest_ssp_tbl);
+
        if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
                vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
                vcpu->arch.pat = vmcs12->host_ia32_pat;
index 106a72c923ca9e089a93f7c08f613c976be9a69b..4233b5ca9461afb2208d3584519e119bfb3fad2b 100644 (file)
@@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = {
        FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
        FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
        FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
+       FIELD(GUEST_S_CET, guest_s_cet),
+       FIELD(GUEST_SSP, guest_ssp),
+       FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl),
        FIELD(HOST_CR0, host_cr0),
        FIELD(HOST_CR3, host_cr3),
        FIELD(HOST_CR4, host_cr4),
@@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = {
        FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
        FIELD(HOST_RSP, host_rsp),
        FIELD(HOST_RIP, host_rip),
+       FIELD(HOST_S_CET, host_s_cet),
+       FIELD(HOST_SSP, host_ssp),
+       FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl),
 };
 const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
index 56fd150a6f243dee3841b1bdd5ee34a5628a97fb..4ad6b16525b93e89821484a02f9d78b05ab33594 100644 (file)
@@ -117,7 +117,13 @@ struct __packed vmcs12 {
        natural_width host_ia32_sysenter_eip;
        natural_width host_rsp;
        natural_width host_rip;
-       natural_width paddingl[8]; /* room for future expansion */
+       natural_width host_s_cet;
+       natural_width host_ssp;
+       natural_width host_ssp_tbl;
+       natural_width guest_s_cet;
+       natural_width guest_ssp;
+       natural_width guest_ssp_tbl;
+       natural_width paddingl[2]; /* room for future expansion */
        u32 pin_based_vm_exec_control;
        u32 cpu_based_vm_exec_control;
        u32 exception_bitmap;
@@ -294,6 +300,12 @@ static inline void vmx_check_vmcs12_offsets(void)
        CHECK_OFFSET(host_ia32_sysenter_eip, 656);
        CHECK_OFFSET(host_rsp, 664);
        CHECK_OFFSET(host_rip, 672);
+       CHECK_OFFSET(host_s_cet, 680);
+       CHECK_OFFSET(host_ssp, 688);
+       CHECK_OFFSET(host_ssp_tbl, 696);
+       CHECK_OFFSET(guest_s_cet, 704);
+       CHECK_OFFSET(guest_ssp, 712);
+       CHECK_OFFSET(guest_ssp_tbl, 720);
        CHECK_OFFSET(pin_based_vm_exec_control, 744);
        CHECK_OFFSET(cpu_based_vm_exec_control, 748);
        CHECK_OFFSET(exception_bitmap, 752);
index c4b07124689dfaa8c0f091790276d5b636c9dcf3..ad5981ff709740b9e7afa2061f63f7baae85cdbc 100644 (file)
@@ -7735,6 +7735,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
        cr4_fixed1_update(X86_CR4_PKE,        ecx, feature_bit(PKU));
        cr4_fixed1_update(X86_CR4_UMIP,       ecx, feature_bit(UMIP));
        cr4_fixed1_update(X86_CR4_LA57,       ecx, feature_bit(LA57));
+       cr4_fixed1_update(X86_CR4_CET,        ecx, feature_bit(SHSTK));
+       cr4_fixed1_update(X86_CR4_CET,        edx, feature_bit(IBT));
 
        entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
        cr4_fixed1_update(X86_CR4_LAM_SUP,    eax, feature_bit(LAM));
index af8224e074eef849bee87426294e84a57c932f48..ea93121029f913fdb8b64505f4807745b41b36ca 100644 (file)
@@ -181,6 +181,9 @@ struct nested_vmx {
         */
        u64 pre_vmenter_debugctl;
        u64 pre_vmenter_bndcfgs;
+       u64 pre_vmenter_s_cet;
+       u64 pre_vmenter_ssp;
+       u64 pre_vmenter_ssp_tbl;
 
        /* to migrate it to L1 if L2 writes to L1's CR8 directly */
        int l1_tpr_threshold;