]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
KVM: TDX: Drop kvm_x86_ops.link_external_spt()
authorSean Christopherson <seanjc@google.com>
Sat, 9 May 2026 07:53:57 +0000 (15:53 +0800)
committerSean Christopherson <seanjc@google.com>
Wed, 27 May 2026 22:35:10 +0000 (15:35 -0700)
Drop the dedicated .link_external_spt() for linking S-EPT pages, and
instead funnel everything through .set_external_spte() for mapping S-EPT
entries. Using separate hooks doesn't help prevent TDP MMU details from
bleeding into TDX, and vice versa; to the contrary, dedicated callbacks
will result in _more_ pollution when hugepage support is added, e.g. will
require the TDP MMU to know details about the splitting rules for TDX that
aren't all that relevant to the TDP MMU.

Ideally, KVM would provide a single pair of hooks to set S-EPT entries,
one hook for setting SPTEs under write-lock and another for setting SPTEs
under read-lock (e.g. to ensure the entire operation is "atomic", to allow
for failure, etc.).  Sadly, TDX's requirement that all child S-EPT entries
are removed before the parent makes that impractical: the TDP MMU
deliberately prunes non-leaf SPTEs and _then_ processes its children, thus
making it quite important for the TDP MMU to differentiate between zapping
leaf and non-leaf S-EPT entries.

However, that's the _only_ case that's truly special, and even that case
could be shoehorned into a single hook; it just wouldn't be a net positive.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Link: https://patch.msgid.link/20260509075357.4113-1-yan.y.zhao@intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/vmx/tdx.c

index b0269325646cbc194f89409a03fa28ef8220d23b..2cb393000ee92725f2e295ee73687335d2259ed8 100644 (file)
@@ -96,7 +96,6 @@ KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
 KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
 KVM_X86_OP_OPTIONAL_RET0(tdp_has_smep)
 KVM_X86_OP(load_mmu_pgd)
-KVM_X86_OP_OPTIONAL_RET0(link_external_spt)
 KVM_X86_OP_OPTIONAL_RET0(set_external_spte)
 KVM_X86_OP_OPTIONAL_RET0(free_external_spt)
 KVM_X86_OP_OPTIONAL(remove_external_spte)
index 8a53ca619570175e99ef6452296ac4f863c3de34..85339d43a9ff6c59a54988a1d1067d0b318cda9f 100644 (file)
@@ -1911,9 +1911,6 @@ struct kvm_x86_ops {
        void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
                             int root_level);
 
-       /* Update external mapping with page table link. */
-       int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
-                               void *external_spt);
        /* Update the external page table from spte getting set. */
        int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
                                 u64 mirror_spte);
index f98afc3422ce50ccacd037dac3e5e9dfbb7956d5..0dc1b0597f8aceaa2ece2833dbd40cc1fb61031b 100644 (file)
@@ -495,27 +495,12 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
        call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
 }
 
-static void *get_external_spt(gfn_t gfn, u64 new_spte, int level)
-{
-       if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) {
-               struct kvm_mmu_page *sp = spte_to_child_sp(new_spte);
-
-               WARN_ON_ONCE(sp->role.level + 1 != level);
-               WARN_ON_ONCE(sp->gfn != gfn);
-               return sp->external_spt;
-       }
-
-       return NULL;
-}
-
 static int __must_check set_external_spte_present(struct kvm *kvm, tdp_ptep_t sptep,
                                                 gfn_t gfn, u64 *old_spte,
                                                 u64 new_spte, int level)
 {
        bool was_present = is_shadow_present_pte(*old_spte);
-       bool is_present = is_shadow_present_pte(new_spte);
-       bool is_leaf = is_present && is_last_spte(new_spte, level);
-       int ret = 0;
+       int ret;
 
        KVM_BUG_ON(was_present, kvm);
 
@@ -528,18 +513,8 @@ static int __must_check set_external_spte_present(struct kvm *kvm, tdp_ptep_t sp
        if (!try_cmpxchg64(rcu_dereference(sptep), old_spte, FROZEN_SPTE))
                return -EBUSY;
 
-       /*
-        * Use different call to either set up middle level
-        * external page table, or leaf.
-        */
-       if (is_leaf) {
-               ret = kvm_x86_call(set_external_spte)(kvm, gfn, level, new_spte);
-       } else {
-               void *external_spt = get_external_spt(gfn, new_spte, level);
+       ret = kvm_x86_call(set_external_spte)(kvm, gfn, level, new_spte);
 
-               KVM_BUG_ON(!external_spt, kvm);
-               ret = kvm_x86_call(link_external_spt)(kvm, gfn, level, external_spt);
-       }
        if (ret)
                __kvm_tdp_mmu_write_spte(sptep, *old_spte);
        else
index 3f956dde4a516be86e581353177f44a78869d3aa..2dfc90c449a77237e4a6b9dc0b88c55105d7fe31 100644 (file)
@@ -1653,18 +1653,58 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
        return 0;
 }
 
+static struct page *tdx_spte_to_sept_pt(struct kvm *kvm, gfn_t gfn,
+                                       u64 new_spte, enum pg_level level)
+{
+       struct kvm_mmu_page *sp = spte_to_child_sp(new_spte);
+
+       if (KVM_BUG_ON(!sp->external_spt, kvm) ||
+           KVM_BUG_ON(sp->role.level + 1 != level, kvm) ||
+           KVM_BUG_ON(sp->gfn != gfn, kvm))
+               return NULL;
+
+       return virt_to_page(sp->external_spt);
+}
+
+static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
+                                    enum pg_level level, u64 mirror_spte)
+{
+       gpa_t gpa = gfn_to_gpa(gfn);
+       u64 err, entry, level_state;
+       struct page *sept_pt;
+
+       sept_pt = tdx_spte_to_sept_pt(kvm, gfn, mirror_spte, level);
+       if (!sept_pt)
+               return -EIO;
+
+       err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, level, sept_pt,
+                              &entry, &level_state);
+       if (unlikely(tdx_operand_busy(err)))
+               return -EBUSY;
+
+       if (TDX_BUG_ON_2(err, TDH_MEM_SEPT_ADD, entry, level_state, kvm))
+               return -EIO;
+
+       return 0;
+}
+
 static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
                                     enum pg_level level, u64 mirror_spte)
 {
        struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
        kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
 
+       if (KVM_BUG_ON(!is_shadow_present_pte(mirror_spte), kvm))
+               return -EIO;
+
+       if (!is_last_spte(mirror_spte, level))
+               return tdx_sept_link_private_spt(kvm, gfn, level, mirror_spte);
+
        /* TODO: handle large pages. */
        if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
                return -EIO;
 
-       WARN_ON_ONCE(!is_shadow_present_pte(mirror_spte) ||
-                    (mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
+       WARN_ON_ONCE((mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
 
        /*
         * Ensure pre_fault_allowed is read by kvm_arch_vcpu_pre_fault_memory()
@@ -1684,24 +1724,6 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
        return tdx_mem_page_aug(kvm, gfn, level, pfn);
 }
 
-static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
-                                    enum pg_level level, void *private_spt)
-{
-       gpa_t gpa = gfn_to_gpa(gfn);
-       struct page *page = virt_to_page(private_spt);
-       u64 err, entry, level_state;
-
-       err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, level, page, &entry,
-                              &level_state);
-       if (unlikely(tdx_operand_busy(err)))
-               return -EBUSY;
-
-       if (TDX_BUG_ON_2(err, TDH_MEM_SEPT_ADD, entry, level_state, kvm))
-               return -EIO;
-
-       return 0;
-}
-
 /*
  * Ensure shared and private EPTs to be flushed on all vCPUs.
  * tdh_mem_track() is the only caller that increases TD epoch. An increase in
@@ -3411,7 +3433,6 @@ int __init tdx_hardware_setup(void)
 
        vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size, sizeof(struct kvm_tdx));
 
-       vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
        vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
        vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
        vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;