From: Will Deacon Date: Mon, 30 Mar 2026 14:48:15 +0000 (+0100) Subject: KVM: arm64: Handle aborts from protected VMs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ea03466e806fea942841a41cfaab8db8c851aa71;p=thirdparty%2Fkernel%2Fstable.git KVM: arm64: Handle aborts from protected VMs Introduce a new abort handler for resolving stage-2 page faults from protected VMs by pinning and donating anonymous memory. This is considerably simpler than the infamous user_mem_abort() as we only have to deal with translation faults at the pte level. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Tested-by: Mostafa Saleh Signed-off-by: Will Deacon Link: https://patch.msgid.link/20260330144841.26181-15-will@kernel.org Signed-off-by: Marc Zyngier --- diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index b3cc5dfe5723..6a4151e3e4a3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1642,6 +1642,74 @@ out_unlock: return ret != -EAGAIN ? ret : 0; } +static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_memory_slot *memslot, unsigned long hva) +{ + unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE; + struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; + struct mm_struct *mm = current->mm; + struct kvm *kvm = vcpu->kvm; + void *hyp_memcache; + struct page *page; + int ret; + + ret = prepare_mmu_memcache(vcpu, true, &hyp_memcache); + if (ret) + return -ENOMEM; + + ret = account_locked_vm(mm, 1, true); + if (ret) + return ret; + + mmap_read_lock(mm); + ret = pin_user_pages(hva, 1, flags, &page); + mmap_read_unlock(mm); + + if (ret == -EHWPOISON) { + kvm_send_hwpoison_signal(hva, PAGE_SHIFT); + ret = 0; + goto dec_account; + } else if (ret != 1) { + ret = -EFAULT; + goto dec_account; + } else if (!folio_test_swapbacked(page_folio(page))) { + /* + * We really can't deal with page-cache pages returned by GUP + * because (a) we may trigger writeback of a page for which we + * no longer have access and (b) page_mkclean() won't find the + * stage-2 mapping in the rmap so we can get out-of-whack with + * the filesystem when marking the page dirty during unpinning + * (see cc5095747edf ("ext4: don't BUG if someone dirty pages + * without asking ext4 first")). + * + * Ideally we'd just restrict ourselves to anonymous pages, but + * we also want to allow memfd (i.e. shmem) pages, so check for + * pages backed by swap in the knowledge that the GUP pin will + * prevent try_to_unmap() from succeeding. + */ + ret = -EIO; + goto unpin; + } + + write_lock(&kvm->mmu_lock); + ret = pkvm_pgtable_stage2_map(pgt, fault_ipa, PAGE_SIZE, + page_to_phys(page), KVM_PGTABLE_PROT_RWX, + hyp_memcache, 0); + write_unlock(&kvm->mmu_lock); + if (ret) { + if (ret == -EAGAIN) + ret = 0; + goto unpin; + } + + return 0; +unpin: + unpin_user_pages(&page, 1); +dec_account: + account_locked_vm(mm, 1, false); + return ret; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, @@ -2205,15 +2273,20 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) && - !write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu)); + if (kvm_vm_is_protected(vcpu->kvm)) { + ret = pkvm_mem_abort(vcpu, fault_ipa, memslot, hva); + } else { + VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) && + !write_fault && + !kvm_vcpu_trap_is_exec_fault(vcpu)); - if (kvm_slot_has_gmem(memslot)) - ret = gmem_abort(vcpu, fault_ipa, nested, memslot, - esr_fsc_is_permission_fault(esr)); - else - ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, - esr_fsc_is_permission_fault(esr)); + if (kvm_slot_has_gmem(memslot)) + ret = gmem_abort(vcpu, fault_ipa, nested, memslot, + esr_fsc_is_permission_fault(esr)); + else + ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, + esr_fsc_is_permission_fault(esr)); + } if (ret == 0) ret = 1; out: