KVM: arm64: Refactor user_mem_abort()

author Fuad Tabba <tabba@google.com>

Tue, 29 Jul 2025 22:54:48 +0000 (15:54 -0700)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 27 Aug 2025 08:36:08 +0000 (04:36 -0400)
author Fuad Tabba <tabba@google.com>
Tue, 29 Jul 2025 22:54:48 +0000 (15:54 -0700)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 27 Aug 2025 08:36:08 +0000 (04:36 -0400)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 1c78864767c5ce73445421447d3690326cd26833..6ed69d309f4543caf3853b1720d2e348f1389f3c 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1477,13 +1477,56 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
         }
  }
  
+static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache,
+                               void **memcache)
+{
+       int min_pages;
+
+       if (!is_protected_kvm_enabled())
+               *memcache = &vcpu->arch.mmu_page_cache;
+       else
+               *memcache = &vcpu->arch.pkvm_memcache;
+
+       if (!topup_memcache)
+               return 0;
+
+       min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
+
+       if (!is_protected_kvm_enabled())
+               return kvm_mmu_topup_memory_cache(*memcache, min_pages);
+
+       return topup_hyp_memcache(*memcache, min_pages);
+}
+
+/*
+ * Potentially reduce shadow S2 permissions to match the guest's own S2. For
+ * exec faults, we'd only reach this point if the guest actually allowed it (see
+ * kvm_s2_handle_perm_fault).
+ *
+ * Also encode the level of the original translation in the SW bits of the leaf
+ * entry as a proxy for the span of that translation. This will be retrieved on
+ * TLB invalidation from the guest and used to limit the invalidation scope if a
+ * TTL hint or a range isn't provided.
+ */
+static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
+                                     enum kvm_pgtable_prot *prot,
+                                     bool *writable)
+{
+       *writable &= kvm_s2_trans_writable(nested);
+       if (!kvm_s2_trans_readable(nested))
+               *prot &= ~KVM_PGTABLE_PROT_R;
+
+       *prot |= kvm_encode_nested_level(nested);
+}
+
  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                           struct kvm_s2_trans *nested,
                           struct kvm_memory_slot *memslot, unsigned long hva,
                           bool fault_is_perm)
  {
         int ret = 0;
-       bool write_fault, writable, force_pte = false;
+       bool topup_memcache;
+       bool write_fault, writable;
         bool exec_fault, mte_allowed, is_vma_cacheable;
         bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
         unsigned long mmu_seq;
@@ -1495,6 +1538,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         gfn_t gfn;
         kvm_pfn_t pfn;
         bool logging_active = memslot_is_logging(memslot);
+       bool force_pte = logging_active;
         long vma_pagesize, fault_granule;
         enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
         struct kvm_pgtable *pgt;
@@ -1506,17 +1550,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                 fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
         write_fault = kvm_is_write_fault(vcpu);
         exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
-       VM_BUG_ON(write_fault && exec_fault);
-
-       if (fault_is_perm && !write_fault && !exec_fault) {
-               kvm_err("Unexpected L2 read permission error\n");
-               return -EFAULT;
-       }
-
-       if (!is_protected_kvm_enabled())
-               memcache = &vcpu->arch.mmu_page_cache;
-       else
-               memcache = &vcpu->arch.pkvm_memcache;
+       VM_WARN_ON_ONCE(write_fault && exec_fault);
  
         /*
          * Permission faults just need to update the existing leaf entry,
@@ -1524,17 +1558,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
          * only exception to this is when dirty logging is enabled at runtime
          * and a write fault needs to collapse a block entry into a table.
          */
-       if (!fault_is_perm || (logging_active && write_fault)) {
-               int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
-
-               if (!is_protected_kvm_enabled())
-                       ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
-               else
-                       ret = topup_hyp_memcache(memcache, min_pages);
-
-               if (ret)
-                       return ret;
-       }
+       topup_memcache = !fault_is_perm || (logging_active && write_fault);
+       ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache);
+       if (ret)
+               return ret;
  
         /*
          * Let's check if we will get back a huge page backed by hugetlbfs, or
@@ -1548,16 +1575,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                 return -EFAULT;
         }
  
-       /*
-        * logging_active is guaranteed to never be true for VM_PFNMAP
-        * memslots.
-        */
-       if (logging_active) {
-               force_pte = true;
+       if (force_pte)
                 vma_shift = PAGE_SHIFT;
-       } else {
+       else
                 vma_shift = get_vma_page_shift(vma, hva);
-       }
  
         switch (vma_shift) {
  #ifndef __PAGETABLE_PMD_FOLDED
@@ -1609,7 +1630,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                         max_map_size = PAGE_SIZE;
  
                 force_pte = (max_map_size == PAGE_SIZE);
-               vma_pagesize = min(vma_pagesize, (long)max_map_size);
+               vma_pagesize = min_t(long, vma_pagesize, max_map_size);
         }
  
         /*
@@ -1642,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
          * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
          * with the smp_wmb() in kvm_mmu_invalidate_end().
          */
-       mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+       mmu_seq = kvm->mmu_invalidate_seq;
         mmap_read_unlock(current->mm);
  
         pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
@@ -1698,24 +1719,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         if (exec_fault && s2_force_noncacheable)
                 return -ENOEXEC;
  
-       /*
-        * Potentially reduce shadow S2 permissions to match the guest's own
-        * S2. For exec faults, we'd only reach this point if the guest
-        * actually allowed it (see kvm_s2_handle_perm_fault).
-        *
-        * Also encode the level of the original translation in the SW bits
-        * of the leaf entry as a proxy for the span of that translation.
-        * This will be retrieved on TLB invalidation from the guest and
-        * used to limit the invalidation scope if a TTL hint or a range
-        * isn't provided.
-        */
-       if (nested) {
-               writable &= kvm_s2_trans_writable(nested);
-               if (!kvm_s2_trans_readable(nested))
-                       prot &= ~KVM_PGTABLE_PROT_R;
-
-               prot |= kvm_encode_nested_level(nested);
-       }
+       if (nested)
+               adjust_nested_fault_perms(nested, &prot, &writable);
  
         kvm_fault_lock(kvm);
         pgt = vcpu->arch.hw_mmu->pgt;
@@ -1981,6 +1986,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
                 goto out_unlock;
         }
  
+       VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
+                       !write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
+
         ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
                              esr_fsc_is_permission_fault(esr));
         if (ret == 0)
author	Fuad Tabba <tabba@google.com>
	Tue, 29 Jul 2025 22:54:48 +0000 (15:54 -0700)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 27 Aug 2025 08:36:08 +0000 (04:36 -0400)