#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
+#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
+#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
-#define is_present_pte(mmu, pte) (!!(*(pte) & PTE_PRESENT_MASK(mmu)))
+/*
+ * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
+ * if it's executable or readable, as EPT supports execute-only PTEs, but not
+ * write-only PTEs.
+ */
+#define is_present_pte(mmu, pte) \
+ (PTE_PRESENT_MASK(mmu) ? \
+ !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
+ !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
+#define is_executable_pte(mmu, pte) \
+ ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
-#define is_nx_pte(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
+#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
struct pte_masks *pte_masks);
mmu->pgd_created = true;
mmu->arch.pte_masks = *pte_masks;
}
+
+ TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5,
+ "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging",
+ mmu->pgtable_levels);
}
void virt_arch_pgd_alloc(struct kvm_vm *vm)
.dirty = BIT_ULL(6),
.huge = BIT_ULL(7),
.nx = BIT_ULL(63),
+ .executable = 0,
.c = vm->arch.c_bit,
.s = vm->arch.s_bit,
};
void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
struct pte_masks *pte_masks)
{
- TEST_ASSERT(!vm->arch.tdp_mmu, "TDP MMU already initialized");
+ TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized");
- vm->arch.tdp_mmu = calloc(1, sizeof(*vm->arch.tdp_mmu));
- virt_mmu_init(vm, vm->arch.tdp_mmu, pte_masks);
+ vm->stage2_mmu.pgtable_levels = pgtable_levels;
+ virt_mmu_init(vm, &vm->stage2_mmu, pte_masks);
}
static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu,
paddr = vm_untag_gpa(vm, paddr);
if (!is_present_pte(mmu, pte)) {
- *pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu);
if (current_level == target_level)
*pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
else
TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
"Unexpected bits in paddr: %lx", paddr);
+ TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu),
+ "X and NX bit masks cannot be used simultaneously");
+
/*
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
TEST_ASSERT(!is_present_pte(mmu, pte),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
- *pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ (paddr & PHYSICAL_PAGE_MASK);
/*
* Neither SEV nor TDX supports shared page tables, so only the final
struct hv_enlightened_vmcs *current_evmcs;
struct hv_vp_assist_page *current_vp_assist;
-struct eptPageTableEntry {
- uint64_t readable:1;
- uint64_t writable:1;
- uint64_t executable:1;
- uint64_t memory_type:3;
- uint64_t ignore_pat:1;
- uint64_t page_size:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t ignored_11_10:2;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t suppress_ve:1;
-};
-
int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
void vm_enable_ept(struct kvm_vm *vm)
{
- TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
- if (vm->arch.tdp_mmu)
- return;
+ struct pte_masks pte_masks;
- /* TODO: Drop eptPageTableEntry in favor of PTE masks. */
- struct pte_masks pte_masks = (struct pte_masks) {
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+ /*
+ * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
+ * 'readable' bit.
+ */
+ pte_masks = (struct pte_masks) {
+ .present = 0,
+ .user = 0,
+ .readable = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .executable = BIT_ULL(2),
+ .huge = BIT_ULL(7),
+ .accessed = BIT_ULL(8),
+ .dirty = BIT_ULL(9),
+ .nx = 0,
};
/* TODO: Add support for 5-level EPT. */
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
- if (vm->arch.tdp_mmu)
- vmx->eptp_gpa = vm->arch.tdp_mmu->pgd;
+ if (vm->stage2_mmu.pgd_created)
+ vmx->eptp_gpa = vm->stage2_mmu.pgd;
*p_vmx_gva = vmx_gva;
return vmx;
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-static void tdp_create_pte(struct kvm_vm *vm,
- struct eptPageTableEntry *pte,
- uint64_t nested_paddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- if (!pte->readable) {
- pte->writable = true;
- pte->readable = true;
- pte->executable = true;
- pte->page_size = (current_level == target_level);
- if (pte->page_size)
- pte->address = paddr >> vm->page_shift;
- else
- pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- }
-}
-
-
-void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
- int target_level)
-{
- const uint64_t page_size = PG_LEVEL_SIZE(target_level);
- void *eptp_hva = addr_gpa2hva(vm, vm->arch.tdp_mmu->pgd);
- struct eptPageTableEntry *pt = eptp_hva, *pte;
- uint16_t index;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
- "Unknown or unsupported guest mode: 0x%x", vm->mode);
-
- TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
- nested_paddr);
- TEST_ASSERT((nested_paddr % page_size) == 0,
- "Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx page_size: 0x%lx",
- nested_paddr, page_size);
- TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx page_size: 0x%lx",
- paddr, page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
- index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- pte = &pt[index];
-
- tdp_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
- if (pte->page_size)
- break;
-
- pt = addr_gpa2hva(vm, pte->address * vm->page_size);
- }
-}
-
/*
* Map a range of EPT guest physical addresses to the VM's physical address
*
void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
uint64_t size, int level)
{
+ struct kvm_mmu *mmu = &vm->stage2_mmu;
size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- __tdp_pg_map(vm, nested_paddr, paddr, level);
+ __virt_pg_map(vm, mmu, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}