return ret != -EAGAIN ? ret : 0;
}
+struct kvm_s2_fault_vma_info {
+ unsigned long mmu_seq;
+ long vma_pagesize;
+ vm_flags_t vm_flags;
+ gfn_t gfn;
+ bool mte_allowed;
+ bool is_vma_cacheable;
+};
+
static short kvm_s2_resolve_vma_size(const struct kvm_s2_fault_desc *s2fd,
struct vm_area_struct *vma, bool *force_pte)
{
struct kvm_s2_fault {
bool writable;
- bool mte_allowed;
- bool is_vma_cacheable;
bool s2_force_noncacheable;
- unsigned long mmu_seq;
- gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active;
bool force_pte;
- long vma_pagesize;
enum kvm_pgtable_prot prot;
struct page *page;
- vm_flags_t vm_flags;
};
static bool kvm_s2_fault_is_perm(const struct kvm_s2_fault_desc *s2fd)
}
static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd,
- struct kvm_s2_fault *fault)
+ struct kvm_s2_fault *fault,
+ struct kvm_s2_fault_vma_info *s2vi)
{
struct vm_area_struct *vma;
struct kvm *kvm = s2fd->vcpu->kvm;
return -EFAULT;
}
- fault->vma_pagesize = BIT(kvm_s2_resolve_vma_size(s2fd, vma, &fault->force_pte));
+ s2vi->vma_pagesize = BIT(kvm_s2_resolve_vma_size(s2fd, vma, &fault->force_pte));
/*
* Both the canonical IPA and fault IPA must be aligned to the
* mapping size to ensure we find the right PFN and lay down the
* mapping in the right place.
*/
- fault->gfn = ALIGN_DOWN(s2fd->fault_ipa, fault->vma_pagesize) >> PAGE_SHIFT;
+ s2vi->gfn = ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize) >> PAGE_SHIFT;
- fault->mte_allowed = kvm_vma_mte_allowed(vma);
+ s2vi->mte_allowed = kvm_vma_mte_allowed(vma);
- fault->vm_flags = vma->vm_flags;
+ s2vi->vm_flags = vma->vm_flags;
- fault->is_vma_cacheable = kvm_vma_is_cacheable(vma);
+ s2vi->is_vma_cacheable = kvm_vma_is_cacheable(vma);
/*
* Read mmu_invalidate_seq so that KVM can detect if the results of
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
* with the smp_wmb() in kvm_mmu_invalidate_end().
*/
- fault->mmu_seq = kvm->mmu_invalidate_seq;
+ s2vi->mmu_seq = kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm);
return 0;
}
static gfn_t get_canonical_gfn(const struct kvm_s2_fault_desc *s2fd,
- const struct kvm_s2_fault *fault)
+ const struct kvm_s2_fault_vma_info *s2vi)
{
phys_addr_t ipa;
if (!s2fd->nested)
- return fault->gfn;
+ return s2vi->gfn;
ipa = kvm_s2_trans_output(s2fd->nested);
- return ALIGN_DOWN(ipa, fault->vma_pagesize) >> PAGE_SHIFT;
+ return ALIGN_DOWN(ipa, s2vi->vma_pagesize) >> PAGE_SHIFT;
}
static int kvm_s2_fault_pin_pfn(const struct kvm_s2_fault_desc *s2fd,
- struct kvm_s2_fault *fault)
+ struct kvm_s2_fault *fault,
+ struct kvm_s2_fault_vma_info *s2vi)
{
int ret;
- ret = kvm_s2_fault_get_vma_info(s2fd, fault);
+ ret = kvm_s2_fault_get_vma_info(s2fd, fault, s2vi);
if (ret)
return ret;
- fault->pfn = __kvm_faultin_pfn(s2fd->memslot, get_canonical_gfn(s2fd, fault),
+ fault->pfn = __kvm_faultin_pfn(s2fd->memslot, get_canonical_gfn(s2fd, s2vi),
kvm_is_write_fault(s2fd->vcpu) ? FOLL_WRITE : 0,
&fault->writable, &fault->page);
if (unlikely(is_error_noslot_pfn(fault->pfn))) {
if (fault->pfn == KVM_PFN_ERR_HWPOISON) {
- kvm_send_hwpoison_signal(s2fd->hva, __ffs(fault->vma_pagesize));
+ kvm_send_hwpoison_signal(s2fd->hva, __ffs(s2vi->vma_pagesize));
return 0;
}
return -EFAULT;
}
static int kvm_s2_fault_compute_prot(const struct kvm_s2_fault_desc *s2fd,
- struct kvm_s2_fault *fault)
+ struct kvm_s2_fault *fault,
+ const struct kvm_s2_fault_vma_info *s2vi)
{
struct kvm *kvm = s2fd->vcpu->kvm;
* Check if this is non-struct page memory PFN, and cannot support
* CMOs. It could potentially be unsafe to access as cacheable.
*/
- if (fault->vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(fault->pfn)) {
- if (fault->is_vma_cacheable) {
+ if (s2vi->vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(fault->pfn)) {
+ if (s2vi->is_vma_cacheable) {
/*
* Whilst the VMA owner expects cacheable mapping to this
* PFN, hardware also has to support the FWB and CACHE DIC
fault->prot |= KVM_PGTABLE_PROT_X;
if (fault->s2_force_noncacheable)
- fault->prot |= (fault->vm_flags & VM_ALLOW_ANY_UNCACHED) ?
+ fault->prot |= (s2vi->vm_flags & VM_ALLOW_ANY_UNCACHED) ?
KVM_PGTABLE_PROT_NORMAL_NC : KVM_PGTABLE_PROT_DEVICE;
else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
fault->prot |= KVM_PGTABLE_PROT_X;
if (!kvm_s2_fault_is_perm(s2fd) && !fault->s2_force_noncacheable && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
- if (!fault->mte_allowed)
+ if (!s2vi->mte_allowed)
return -EFAULT;
}
return 0;
}
-static phys_addr_t get_ipa(const struct kvm_s2_fault *fault)
-{
- return gfn_to_gpa(fault->gfn);
-}
-
static int kvm_s2_fault_map(const struct kvm_s2_fault_desc *s2fd,
- struct kvm_s2_fault *fault, void *memcache)
+ struct kvm_s2_fault *fault,
+ const struct kvm_s2_fault_vma_info *s2vi, void *memcache)
{
+ enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
struct kvm *kvm = s2fd->vcpu->kvm;
struct kvm_pgtable *pgt;
long perm_fault_granule;
+ long mapping_size;
+ gfn_t gfn;
int ret;
- enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
kvm_fault_lock(kvm);
pgt = s2fd->vcpu->arch.hw_mmu->pgt;
ret = -EAGAIN;
- if (mmu_invalidate_retry(kvm, fault->mmu_seq))
+ if (mmu_invalidate_retry(kvm, s2vi->mmu_seq))
goto out_unlock;
perm_fault_granule = (kvm_s2_fault_is_perm(s2fd) ?
kvm_vcpu_trap_get_perm_fault_granule(s2fd->vcpu) : 0);
+ mapping_size = s2vi->vma_pagesize;
+ gfn = s2vi->gfn;
/*
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (fault->vma_pagesize == PAGE_SIZE &&
+ if (mapping_size == PAGE_SIZE &&
!(fault->force_pte || fault->s2_force_noncacheable)) {
if (perm_fault_granule > PAGE_SIZE) {
- fault->vma_pagesize = perm_fault_granule;
+ mapping_size = perm_fault_granule;
} else {
- fault->vma_pagesize = transparent_hugepage_adjust(kvm, s2fd->memslot,
- s2fd->hva, &fault->pfn,
- &fault->gfn);
-
- if (fault->vma_pagesize < 0) {
- ret = fault->vma_pagesize;
+ mapping_size = transparent_hugepage_adjust(kvm, s2fd->memslot,
+ s2fd->hva, &fault->pfn,
+ &gfn);
+ if (mapping_size < 0) {
+ ret = mapping_size;
goto out_unlock;
}
}
}
if (!perm_fault_granule && !fault->s2_force_noncacheable && kvm_has_mte(kvm))
- sanitise_mte_tags(kvm, fault->pfn, fault->vma_pagesize);
+ sanitise_mte_tags(kvm, fault->pfn, mapping_size);
/*
* Under the premise of getting a FSC_PERM fault, we just need to relax
- * permissions only if vma_pagesize equals perm_fault_granule. Otherwise,
+ * permissions only if mapping_size equals perm_fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
- if (fault->vma_pagesize == perm_fault_granule) {
+ if (mapping_size == perm_fault_granule) {
/*
* Drop the SW bits in favour of those stored in the
* PTE, which will be preserved.
*/
fault->prot &= ~KVM_NV_GUEST_MAP_SZ;
- ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, get_ipa(fault),
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, gfn_to_gpa(gfn),
fault->prot, flags);
} else {
- ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, get_ipa(fault), fault->vma_pagesize,
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, gfn_to_gpa(gfn), mapping_size,
__pfn_to_phys(fault->pfn), fault->prot,
memcache, flags);
}
kvm_release_faultin_page(kvm, fault->page, !!ret, fault->writable);
kvm_fault_unlock(kvm);
- /* Mark the page dirty only if the fault is handled successfully */
- if (fault->writable && !ret)
- mark_page_dirty_in_slot(kvm, s2fd->memslot, get_canonical_gfn(s2fd, fault));
+ /*
+ * Mark the page dirty only if the fault is handled successfully,
+ * making sure we adjust the canonical IPA if the mapping size has
+ * been updated (via a THP upgrade, for example).
+ */
+ if (fault->writable && !ret) {
+ phys_addr_t ipa = gfn_to_gpa(get_canonical_gfn(s2fd, s2vi));
+ ipa &= ~(mapping_size - 1);
+ mark_page_dirty_in_slot(kvm, s2fd->memslot, gpa_to_gfn(ipa));
+ }
if (ret != -EAGAIN)
return ret;
{
bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
bool logging_active = memslot_is_logging(s2fd->memslot);
+ struct kvm_s2_fault_vma_info s2vi = {};
struct kvm_s2_fault fault = {
.logging_active = logging_active,
.force_pte = logging_active,
* Let's check if we will get back a huge page backed by hugetlbfs, or
* get block mapping for device MMIO region.
*/
- ret = kvm_s2_fault_pin_pfn(s2fd, &fault);
+ ret = kvm_s2_fault_pin_pfn(s2fd, &fault, &s2vi);
if (ret != 1)
return ret;
- ret = kvm_s2_fault_compute_prot(s2fd, &fault);
+ ret = kvm_s2_fault_compute_prot(s2fd, &fault, &s2vi);
if (ret) {
kvm_release_page_unused(fault.page);
return ret;
}
- return kvm_s2_fault_map(s2fd, &fault, memcache);
+ return kvm_s2_fault_map(s2fd, &fault, &s2vi, memcache);
}
/* Resolve the access fault by making the page young again. */