]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: arm64: Handle aborts from protected VMs
authorWill Deacon <will@kernel.org>
Mon, 30 Mar 2026 14:48:15 +0000 (15:48 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 30 Mar 2026 15:58:08 +0000 (16:58 +0100)
Introduce a new abort handler for resolving stage-2 page faults from
protected VMs by pinning and donating anonymous memory. This is
considerably simpler than the infamous user_mem_abort() as we only have
to deal with translation faults at the pte level.

Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://patch.msgid.link/20260330144841.26181-15-will@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/kvm/mmu.c

index b3cc5dfe572378ddb2b96458a6f147e3a8de159a..6a4151e3e4a336d2468e42607a18ea6bd61979e8 100644 (file)
@@ -1642,6 +1642,74 @@ out_unlock:
        return ret != -EAGAIN ? ret : 0;
 }
 
+static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+               struct kvm_memory_slot *memslot, unsigned long hva)
+{
+       unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+       struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
+       struct mm_struct *mm = current->mm;
+       struct kvm *kvm = vcpu->kvm;
+       void *hyp_memcache;
+       struct page *page;
+       int ret;
+
+       ret = prepare_mmu_memcache(vcpu, true, &hyp_memcache);
+       if (ret)
+               return -ENOMEM;
+
+       ret = account_locked_vm(mm, 1, true);
+       if (ret)
+               return ret;
+
+       mmap_read_lock(mm);
+       ret = pin_user_pages(hva, 1, flags, &page);
+       mmap_read_unlock(mm);
+
+       if (ret == -EHWPOISON) {
+               kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
+               ret = 0;
+               goto dec_account;
+       } else if (ret != 1) {
+               ret = -EFAULT;
+               goto dec_account;
+       } else if (!folio_test_swapbacked(page_folio(page))) {
+               /*
+                * We really can't deal with page-cache pages returned by GUP
+                * because (a) we may trigger writeback of a page for which we
+                * no longer have access and (b) page_mkclean() won't find the
+                * stage-2 mapping in the rmap so we can get out-of-whack with
+                * the filesystem when marking the page dirty during unpinning
+                * (see cc5095747edf ("ext4: don't BUG if someone dirty pages
+                * without asking ext4 first")).
+                *
+                * Ideally we'd just restrict ourselves to anonymous pages, but
+                * we also want to allow memfd (i.e. shmem) pages, so check for
+                * pages backed by swap in the knowledge that the GUP pin will
+                * prevent try_to_unmap() from succeeding.
+                */
+               ret = -EIO;
+               goto unpin;
+       }
+
+       write_lock(&kvm->mmu_lock);
+       ret = pkvm_pgtable_stage2_map(pgt, fault_ipa, PAGE_SIZE,
+                                     page_to_phys(page), KVM_PGTABLE_PROT_RWX,
+                                     hyp_memcache, 0);
+       write_unlock(&kvm->mmu_lock);
+       if (ret) {
+               if (ret == -EAGAIN)
+                       ret = 0;
+               goto unpin;
+       }
+
+       return 0;
+unpin:
+       unpin_user_pages(&page, 1);
+dec_account:
+       account_locked_vm(mm, 1, false);
+       return ret;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_s2_trans *nested,
                          struct kvm_memory_slot *memslot, unsigned long hva,
@@ -2205,15 +2273,20 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
                goto out_unlock;
        }
 
-       VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
-                       !write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
+       if (kvm_vm_is_protected(vcpu->kvm)) {
+               ret = pkvm_mem_abort(vcpu, fault_ipa, memslot, hva);
+       } else {
+               VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
+                               !write_fault &&
+                               !kvm_vcpu_trap_is_exec_fault(vcpu));
 
-       if (kvm_slot_has_gmem(memslot))
-               ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
-                                esr_fsc_is_permission_fault(esr));
-       else
-               ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
-                                    esr_fsc_is_permission_fault(esr));
+               if (kvm_slot_has_gmem(memslot))
+                       ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
+                                        esr_fsc_is_permission_fault(esr));
+               else
+                       ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
+                                            esr_fsc_is_permission_fault(esr));
+       }
        if (ret == 0)
                ret = 1;
 out: