]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
KVM: arm64: Introduce hypercall to force reclaim of a protected page
authorWill Deacon <will@kernel.org>
Mon, 30 Mar 2026 14:48:26 +0000 (15:48 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 30 Mar 2026 15:58:09 +0000 (16:58 +0100)
Introduce a new hypercall, __pkvm_force_reclaim_guest_page(), to allow
the host to forcefully reclaim a physical page that was previous donated
to a protected guest. This results in the page being zeroed and the
previous guest mapping being poisoned so that new pages cannot be
subsequently donated at the same IPA.

Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://patch.msgid.link/20260330144841.26181-26-will@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
arch/arm64/kvm/hyp/include/nvhe/memory.h
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/mem_protect.c
arch/arm64/kvm/hyp/nvhe/pkvm.c

index b6df8f64d573132e943489903d2ce0662bded192..04a230e906a76c4bedb3bc6440d10c4035721953 100644 (file)
@@ -90,6 +90,7 @@ enum __kvm_host_smccc_func {
        __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+       __KVM_HOST_SMCCC_FUNC___pkvm_force_reclaim_guest_page,
        __KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
        __KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
index 2df22640833c64337b629d1bf1884f5f71de7f51..41a8687938eb6be1faaaab85cc7f59a6a8fe10e8 100644 (file)
@@ -116,6 +116,12 @@ enum kvm_invalid_pte_type {
         * ownership.
         */
        KVM_HOST_INVALID_PTE_TYPE_DONATION,
+
+       /*
+        * The page has been forcefully reclaimed from the guest by the
+        * host.
+        */
+       KVM_GUEST_INVALID_PTE_TYPE_POISONED,
 };
 
 static inline bool kvm_pte_valid(kvm_pte_t pte)
index 29f81a1d9e1fe610a841b27c44651a4033177a6f..acc0311036004934f54e29d309427848ce804fee 100644 (file)
@@ -40,6 +40,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
+int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys);
 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
 int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
                            enum kvm_pgtable_prot prot);
index dee1a406b0c28c6eb959bdf7c858d48a784442f5..4cedb720c75d221a53fcc123efd76199f3141cd5 100644 (file)
@@ -30,6 +30,12 @@ enum pkvm_page_state {
         * struct hyp_page.
         */
        PKVM_NOPAGE                     = BIT(0) | BIT(1),
+
+       /*
+        * 'Meta-states' which aren't encoded directly in the PTE's SW bits (or
+        * the hyp_vmemmap entry for the host)
+        */
+       PKVM_POISON                     = BIT(2),
 };
 #define PKVM_PAGE_STATE_MASK           (BIT(0) | BIT(1))
 
index 506831804f6476b59e65dedd8b386e1bb309a0d3..a5a7bb453f3e4e01e2340e519ae4dfdf1de53904 100644 (file)
@@ -78,6 +78,7 @@ int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
 int __pkvm_start_teardown_vm(pkvm_handle_t handle);
 int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
 
+struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle);
 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
                                         unsigned int vcpu_idx);
 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
index 6db5aebd92dcdf64dda8a0504d3fdaba461c88eb..456c832077173e88f1a51e0027c1b2bd119c1d58 100644 (file)
@@ -573,6 +573,13 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
        cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
 }
 
+static void handle___pkvm_force_reclaim_guest_page(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
+
+       cpu_reg(host_ctxt, 1) = __pkvm_host_force_reclaim_page_guest(phys);
+}
+
 static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
@@ -634,6 +641,7 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__pkvm_unreserve_vm),
        HANDLE_FUNC(__pkvm_init_vm),
        HANDLE_FUNC(__pkvm_init_vcpu),
+       HANDLE_FUNC(__pkvm_force_reclaim_guest_page),
        HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
        HANDLE_FUNC(__pkvm_start_teardown_vm),
        HANDLE_FUNC(__pkvm_finalize_teardown_vm),
index 51cb5c89fd207fbdae3bbc17b03039bff54e8293..73bdbd4a508ee81cac3a7d73cb47e643efd9941d 100644 (file)
@@ -616,6 +616,35 @@ static u64 host_stage2_encode_gfn_meta(struct pkvm_hyp_vm *vm, u64 gfn)
               FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn);
 }
 
+static int host_stage2_decode_gfn_meta(kvm_pte_t pte, struct pkvm_hyp_vm **vm,
+                                      u64 *gfn)
+{
+       pkvm_handle_t handle;
+       u64 meta;
+
+       if (WARN_ON(kvm_pte_valid(pte)))
+               return -EINVAL;
+
+       if (FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) !=
+           KVM_HOST_INVALID_PTE_TYPE_DONATION) {
+               return -EINVAL;
+       }
+
+       if (FIELD_GET(KVM_HOST_DONATION_PTE_OWNER_MASK, pte) != PKVM_ID_GUEST)
+               return -EPERM;
+
+       meta = FIELD_GET(KVM_HOST_DONATION_PTE_EXTRA_MASK, pte);
+       handle = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, meta);
+       *vm = get_vm_by_handle(handle);
+       if (!*vm) {
+               /* We probably raced with teardown; try again */
+               return -EAGAIN;
+       }
+
+       *gfn = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, meta);
+       return 0;
+}
+
 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
 {
        /*
@@ -801,8 +830,20 @@ static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_pa
        return 0;
 }
 
+static bool guest_pte_is_poisoned(kvm_pte_t pte)
+{
+       if (kvm_pte_valid(pte))
+               return false;
+
+       return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) ==
+              KVM_GUEST_INVALID_PTE_TYPE_POISONED;
+}
+
 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
 {
+       if (guest_pte_is_poisoned(pte))
+               return PKVM_POISON;
+
        if (!kvm_pte_valid(pte))
                return PKVM_NOPAGE;
 
@@ -831,6 +872,8 @@ static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep,
        ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
        if (ret)
                return ret;
+       if (guest_pte_is_poisoned(pte))
+               return -EHWPOISON;
        if (!kvm_pte_valid(pte))
                return -ENOENT;
        if (level != KVM_PGTABLE_LAST_LEVEL)
@@ -1096,6 +1139,86 @@ static void hyp_poison_page(phys_addr_t phys)
        hyp_fixmap_unmap();
 }
 
+static int host_stage2_get_guest_info(phys_addr_t phys, struct pkvm_hyp_vm **vm,
+                                     u64 *gfn)
+{
+       enum pkvm_page_state state;
+       kvm_pte_t pte;
+       s8 level;
+       int ret;
+
+       if (!addr_is_memory(phys))
+               return -EFAULT;
+
+       state = get_host_state(hyp_phys_to_page(phys));
+       switch (state) {
+       case PKVM_PAGE_OWNED:
+       case PKVM_PAGE_SHARED_OWNED:
+       case PKVM_PAGE_SHARED_BORROWED:
+               /* The access should no longer fault; try again. */
+               return -EAGAIN;
+       case PKVM_NOPAGE:
+               break;
+       default:
+               return -EPERM;
+       }
+
+       ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, &level);
+       if (ret)
+               return ret;
+
+       if (WARN_ON(level != KVM_PGTABLE_LAST_LEVEL))
+               return -EINVAL;
+
+       return host_stage2_decode_gfn_meta(pte, vm, gfn);
+}
+
+int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys)
+{
+       struct pkvm_hyp_vm *vm;
+       u64 gfn, ipa, pa;
+       kvm_pte_t pte;
+       int ret;
+
+       phys &= PAGE_MASK;
+
+       hyp_spin_lock(&vm_table_lock);
+       host_lock_component();
+
+       ret = host_stage2_get_guest_info(phys, &vm, &gfn);
+       if (ret)
+               goto unlock_host;
+
+       ipa = hyp_pfn_to_phys(gfn);
+       guest_lock_component(vm);
+       ret = get_valid_guest_pte(vm, ipa, &pte, &pa);
+       if (ret)
+               goto unlock_guest;
+
+       WARN_ON(pa != phys);
+       if (guest_get_page_state(pte, ipa) != PKVM_PAGE_OWNED) {
+               ret = -EPERM;
+               goto unlock_guest;
+       }
+
+       /* We really shouldn't be allocating, so don't pass a memcache */
+       ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, NULL,
+                                         KVM_GUEST_INVALID_PTE_TYPE_POISONED,
+                                         0);
+       if (ret)
+               goto unlock_guest;
+
+       hyp_poison_page(phys);
+       WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
+unlock_guest:
+       guest_unlock_component(vm);
+unlock_host:
+       host_unlock_component();
+       hyp_spin_unlock(&vm_table_lock);
+
+       return ret;
+}
+
 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
 {
        u64 ipa = hyp_pfn_to_phys(gfn);
@@ -1130,7 +1253,11 @@ unlock:
        guest_unlock_component(vm);
        host_unlock_component();
 
-       return ret;
+       /*
+        * -EHWPOISON implies that the page was forcefully reclaimed already
+        * so return success for the GUP pin to be dropped.
+        */
+       return ret && ret != -EHWPOISON ? ret : 0;
 }
 
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
index 0ba6423cd0d5649603bd86750615b1b9212d6d3e..cdeefe3d74ff7af26424a36501a08c069df6e1a2 100644 (file)
@@ -230,10 +230,12 @@ void pkvm_hyp_vm_table_init(void *tbl)
 /*
  * Return the hyp vm structure corresponding to the handle.
  */
-static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
+struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
 {
        unsigned int idx = vm_handle_to_idx(handle);
 
+       hyp_assert_lock_held(&vm_table_lock);
+
        if (unlikely(idx >= KVM_MAX_PVMS))
                return NULL;