* the number of unique SPs that can theoretically be created is 2^n, where n
* is the number of bits that are used to compute the role.
*
- * But, even though there are 20 bits in the mask below, not all combinations
+ * But, even though there are 21 bits in the mask below, not all combinations
* of modes and flags are possible:
*
* - invalid shadow pages are not accounted, mirror pages are not shadowed,
- * so the bits are effectively 18.
+ * so the bits are effectively 19.
*
* - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
* execonly and ad_disabled are only used for nested EPT which has
* cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
*
* Therefore, the maximum number of possible upper-level shadow pages for a
- * single gfn is a bit less than 2^13.
+ * single gfn is a bit less than 2^14.
*/
union kvm_mmu_page_role {
u32 word;
unsigned has_4_byte_gpte:1;
unsigned quadrant:2;
unsigned direct:1;
- unsigned access:3;
+ unsigned access:4;
unsigned invalid:1;
unsigned efer_nx:1;
unsigned cr0_wp:1;
unsigned guest_mode:1;
unsigned passthrough:1;
unsigned is_mirror:1;
- unsigned :4;
+ unsigned:3;
/*
* This is left at the top of the word so that
* Byte index: page fault error code [4:1]
* Bit index: pte permissions in ACC_* format
*/
- u8 permissions[16];
+ u16 permissions[16];
u64 *pae_root;
u64 *pml4_root;
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
-void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
+void kvm_mmu_set_ept_masks(bool has_ad_bits);
void kvm_init_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
*/
const union kvm_mmu_page_role sync_role_ign = {
.level = 0xf,
- .access = 0x7,
+ .access = ACC_ALL,
.quadrant = 0x3,
.passthrough = 0x1,
};
* update_permission_bitmask() builds what is effectively a
* two-dimensional array of bools. The second dimension is
* provided by individual bits of permissions[pfec >> 1], and
- * logical &, | and ~ operations operate on all the 8 possible
+ * logical &, | and ~ operations operate on all the 16 possible
* combinations of ACC_* bits.
*/
#define ACC_BITS_MASK(access) \
(4 & (access) ? 1 << 4 : 0) | \
(5 & (access) ? 1 << 5 : 0) | \
(6 & (access) ? 1 << 6 : 0) | \
- (7 & (access) ? 1 << 7 : 0))
+ (7 & (access) ? 1 << 7 : 0) | \
+ (8 & (access) ? 1 << 8 : 0) | \
+ (9 & (access) ? 1 << 9 : 0) | \
+ (10 & (access) ? 1 << 10 : 0) | \
+ (11 & (access) ? 1 << 11 : 0) | \
+ (12 & (access) ? 1 << 12 : 0) | \
+ (13 & (access) ? 1 << 13 : 0) | \
+ (14 & (access) ? 1 << 14 : 0) | \
+ (15 & (access) ? 1 << 15 : 0))
static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
{
unsigned index;
- const u8 x = ACC_BITS_MASK(ACC_EXEC_MASK);
- const u8 w = ACC_BITS_MASK(ACC_WRITE_MASK);
- const u8 u = ACC_BITS_MASK(ACC_USER_MASK);
+ const u16 x = ACC_BITS_MASK(ACC_EXEC_MASK);
+ const u16 w = ACC_BITS_MASK(ACC_WRITE_MASK);
+ const u16 r = ACC_BITS_MASK(ACC_READ_MASK);
bool cr4_smep = is_cr4_smep(mmu);
bool cr4_smap = is_cr4_smap(mmu);
unsigned pfec = index << 1;
/*
- * Each "*f" variable has a 1 bit for each UWX value
+ * Each "*f" variable has a 1 bit for each ACC_* combo
* that causes a fault with the given PFEC.
*/
/* Faults from reads to non-readable pages */
- u8 rf = 0;
+ u16 rf = (pfec & (PFERR_WRITE_MASK|PFERR_FETCH_MASK)) ? 0 : (u16)~r;
/* Faults from writes to non-writable pages */
- u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
+ u16 wf = (pfec & PFERR_WRITE_MASK) ? (u16)~w : 0;
/* Faults from user mode accesses to supervisor pages */
- u8 uf = 0;
+ u16 uf = 0;
/* Faults from fetches of non-executable pages */
- u8 ff = 0;
+ u16 ff = 0;
/* Faults from kernel mode accesses of user pages */
- u8 smapf = 0;
+ u16 smapf = 0;
if (ept) {
- rf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
- ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
+ ff = (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
} else {
+ const u16 u = ACC_BITS_MASK(ACC_USER_MASK);
+
/* Faults from kernel mode accesses to user pages */
- u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
+ u16 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
- uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
+ uf = (pfec & PFERR_USER_MASK) ? (u16)~u : 0;
if (efer_nx)
- ff |= (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
+ ff |= (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
/* Allow supervisor writes if !cr0.wp */
if (!cr0_wp)
#define KVM_MMU_PAGE_PRINTK() ({ \
const char *saved_ptr = trace_seq_buffer_ptr(p); \
static const char *access_str[] = { \
- "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
+ "----", "r---", "-w--", "rw--", "--u-", "r-u-", "-wu-", "rwu-", \
+ "---x", "r--x", "-w-x", "rw-x", "--ux", "r-ux", "-wux", "rwux" \
}; \
union kvm_mmu_page_role role; \
\
return true;
}
-/*
- * For PTTYPE_EPT, a page table can be executable but not readable
- * on supported processors. Therefore, set_spte does not automatically
- * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
- * to signify readability since it isn't used in the EPT case
- */
static inline unsigned FNAME(gpte_access)(u64 gpte)
{
unsigned access;
#if PTTYPE == PTTYPE_EPT
access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
- ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
+ ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
#else
- BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
- BUILD_BUG_ON(ACC_EXEC_MASK != 1);
+ /*
+ * P is set here, so the page is always readable and W/U/!NX represent
+ * allowed accesses.
+ */
+ BUILD_BUG_ON(ACC_READ_MASK != PT_PRESENT_MASK);
+ BUILD_BUG_ON(ACC_WRITE_MASK != PT_WRITABLE_MASK);
+ BUILD_BUG_ON(ACC_USER_MASK != PT_USER_MASK);
+ BUILD_BUG_ON(ACC_EXEC_MASK & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK));
access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
- /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */
- access ^= (gpte >> PT64_NX_SHIFT);
+ access |= gpte & PT64_NX_MASK ? 0 : ACC_EXEC_MASK;
#endif
return access;
if (write_fault)
walker->fault.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
- if (user_fault)
- walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
- if (fetch_fault)
+ else if (fetch_fault)
walker->fault.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
+ else
+ walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
+
+ /*
+ * Accesses to guest paging structures are either "reads" or
+ * "read+write" accesses, so consider them the latter if write_fault
+ * is true.
+ */
+ if (access & PFERR_GUEST_PAGE_MASK)
+ walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
/*
* Note, pte_access holds the raw RWX bits from the EPTE, not
int is_host_mmio = -1;
bool wrprot = false;
- /*
- * For the EPT case, shadow_present_mask has no RWX bits set if
- * exec-only page table entries are supported. In that case,
- * ACC_USER_MASK and shadow_user_mask are used to represent
- * read access. See FNAME(gpte_access) in paging_tmpl.h.
- */
WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
if (sp->role.ad_disabled)
pte_access &= ~ACC_EXEC_MASK;
}
+ if (pte_access & ACC_READ_MASK)
+ spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
+
if (pte_access & ACC_EXEC_MASK)
spte |= shadow_x_mask;
else
u64 spte = SPTE_MMU_PRESENT_MASK;
spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
+ PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
shadow_user_mask | shadow_x_mask | shadow_me_value;
if (ad_disabled)
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_me_spte_mask);
-void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
+void kvm_mmu_set_ept_masks(bool has_ad_bits)
{
kvm_ad_enabled = has_ad_bits;
- shadow_user_mask = VMX_EPT_READABLE_MASK;
+ shadow_user_mask = 0;
shadow_accessed_mask = VMX_EPT_ACCESS_BIT;
shadow_dirty_mask = VMX_EPT_DIRTY_BIT;
shadow_nx_mask = 0ull;
shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
- /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
- shadow_present_mask =
- (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
+ shadow_present_mask = VMX_EPT_SUPPRESS_VE_BIT;
shadow_acc_track_mask = VMX_EPT_RWX_MASK;
shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
#define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
#endif
-#define ACC_EXEC_MASK 1
+#define ACC_READ_MASK PT_PRESENT_MASK
#define ACC_WRITE_MASK PT_WRITABLE_MASK
#define ACC_USER_MASK PT_USER_MASK
-#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+#define ACC_EXEC_MASK 8
+#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
#define SPTE_LEVEL_BITS 9
#define SPTE_LEVEL_SHIFT(level) __PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS)
cpu_has_vmx_virtualize_apic_accesses();
}
-static inline bool cpu_has_vmx_ept_execute_only(void)
-{
- return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
-}
-
static inline bool cpu_has_vmx_ept_4levels(void)
{
return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
{
u64 error_code;
- /* Is it a read fault? */
- error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
- ? PFERR_USER_MASK : 0;
/* Is it a write fault? */
- error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
+ error_code = (exit_qualification & EPT_VIOLATION_ACC_WRITE)
? PFERR_WRITE_MASK : 0;
/* Is it a fetch fault? */
error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
if (enable_ept)
- kvm_mmu_set_ept_masks(enable_ept_ad_bits,
- cpu_has_vmx_ept_execute_only());
+ kvm_mmu_set_ept_masks(enable_ept_ad_bits);
else
vt_x86_ops.get_mt_mask = NULL;