]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: arm64: Introduce __pkvm_reclaim_dying_guest_page()
authorWill Deacon <will@kernel.org>
Mon, 30 Mar 2026 14:48:16 +0000 (15:48 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 30 Mar 2026 15:58:08 +0000 (16:58 +0100)
To enable reclaim of pages from a protected VM during teardown,
introduce a new hypercall to reclaim a single page from a protected
guest that is in the dying state.

Since the EL2 code is non-preemptible, the new hypercall deliberately
acts on a single page at a time so as to allow EL1 to reschedule
frequently during the teardown operation.

Reviewed-by: Vincent Donnefort <vdonnefort@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Co-developed-by: Quentin Perret <qperret@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://patch.msgid.link/20260330144841.26181-16-will@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_asm.h
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/mem_protect.c
arch/arm64/kvm/hyp/nvhe/pkvm.c

index dfc6625c82697718023610eaade7fe6865645771..b6df8f64d573132e943489903d2ce0662bded192 100644 (file)
@@ -90,6 +90,7 @@ enum __kvm_host_smccc_func {
        __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+       __KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
        __KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
        __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
index 7061b0be340a9cfc0a36b77175eb05a74d9f9325..29f81a1d9e1fe610a841b27c44651a4033177a6f 100644 (file)
@@ -40,6 +40,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
+int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
 int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
                            enum kvm_pgtable_prot prot);
 int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
index 04c7ca7030144d38325520d81ed25d9a8b750103..506831804f6476b59e65dedd8b386e1bb309a0d3 100644 (file)
@@ -74,6 +74,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
                     unsigned long vcpu_hva);
 
+int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
 int __pkvm_start_teardown_vm(pkvm_handle_t handle);
 int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
 
index 970656318cf2511e0112fe0110ee4d8ac0564248..7294c94f929678772994ebf06e31313c811e5aae 100644 (file)
@@ -573,6 +573,14 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
        cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
 }
 
+static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+       DECLARE_REG(u64, gfn, host_ctxt, 2);
+
+       cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
+}
+
 static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
@@ -626,6 +634,7 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__pkvm_unreserve_vm),
        HANDLE_FUNC(__pkvm_init_vm),
        HANDLE_FUNC(__pkvm_init_vcpu),
+       HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
        HANDLE_FUNC(__pkvm_start_teardown_vm),
        HANDLE_FUNC(__pkvm_finalize_teardown_vm),
        HANDLE_FUNC(__pkvm_vcpu_load),
index 03e6fa124253a057fb32a2e9ad594306fd14ea63..ca266a4d9d50ad08623d74d7d0441f74e86eee3c 100644 (file)
@@ -738,6 +738,32 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
        return check_page_state_range(&vm->pgt, addr, size, &d);
 }
 
+static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
+{
+       kvm_pte_t pte;
+       u64 phys;
+       s8 level;
+       int ret;
+
+       ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
+       if (ret)
+               return ret;
+       if (!kvm_pte_valid(pte))
+               return -ENOENT;
+       if (level != KVM_PGTABLE_LAST_LEVEL)
+               return -E2BIG;
+
+       phys = kvm_pte_to_phys(pte);
+       ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+       if (WARN_ON(ret))
+               return ret;
+
+       *ptep = pte;
+       *physp = phys;
+
+       return 0;
+}
+
 int __pkvm_host_share_hyp(u64 pfn)
 {
        u64 phys = hyp_pfn_to_phys(pfn);
@@ -971,6 +997,59 @@ static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *s
        return 0;
 }
 
+static void hyp_poison_page(phys_addr_t phys)
+{
+       void *addr = hyp_fixmap_map(phys);
+
+       memset(addr, 0, PAGE_SIZE);
+       /*
+        * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
+        * here as the latter may elide the CMO under the assumption that FWB
+        * will be enabled on CPUs that support it. This is incorrect for the
+        * host stage-2 and would otherwise lead to a malicious host potentially
+        * being able to read the contents of newly reclaimed guest pages.
+        */
+       kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
+       hyp_fixmap_unmap();
+}
+
+int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+{
+       u64 ipa = hyp_pfn_to_phys(gfn);
+       kvm_pte_t pte;
+       u64 phys;
+       int ret;
+
+       host_lock_component();
+       guest_lock_component(vm);
+
+       ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
+       if (ret)
+               goto unlock;
+
+       switch (guest_get_page_state(pte, ipa)) {
+       case PKVM_PAGE_OWNED:
+               WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
+               hyp_poison_page(phys);
+               break;
+       case PKVM_PAGE_SHARED_OWNED:
+               WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
+               break;
+       default:
+               ret = -EPERM;
+               goto unlock;
+       }
+
+       WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
+       WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
+
+unlock:
+       guest_unlock_component(vm);
+       host_unlock_component();
+
+       return ret;
+}
+
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
 {
        struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
index 61e69e24656a5a71ec25c95135b0387001cdb8d4..092e9d0e55ac14678a606826381e25f77c4ce633 100644 (file)
@@ -868,6 +868,20 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
        unmap_donated_memory_noclear(addr, size);
 }
 
+int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn)
+{
+       struct pkvm_hyp_vm *hyp_vm;
+       int ret = -EINVAL;
+
+       hyp_spin_lock(&vm_table_lock);
+       hyp_vm = get_vm_by_handle(handle);
+       if (hyp_vm && hyp_vm->kvm.arch.pkvm.is_dying)
+               ret = __pkvm_host_reclaim_page_guest(gfn, hyp_vm);
+       hyp_spin_unlock(&vm_table_lock);
+
+       return ret;
+}
+
 int __pkvm_start_teardown_vm(pkvm_handle_t handle)
 {
        struct pkvm_hyp_vm *hyp_vm;