]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: arm64: nv: Handle VNCR_EL2-triggered faults
authorMarc Zyngier <maz@kernel.org>
Wed, 14 May 2025 10:34:52 +0000 (11:34 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 19 May 2025 07:01:19 +0000 (08:01 +0100)
As VNCR_EL2.BADDR contains a VA, it is bound to trigger faults.

These faults can have multiple source:

- We haven't mapped anything on the host: we need to compute the
  resulting translation, populate a TLB, and eventually map
  the corresponding page

- The permissions are out of whack: we need to tell the guest about
  this state of affairs

Note that the kernel doesn't support S1POE for itself yet, so
the particular case of a VNCR page mapped with no permissions
or with write-only permissions is not correctly handled yet.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250514103501.2225951-10-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/nested.c

index e4f77757937e65d6605ab61d49376c0d9e69808d..fb4e119e1aafaefa9360c8b5f6cd78a8214fd6e2 100644 (file)
@@ -99,6 +99,8 @@
 #define ESR_ELx_AET_CE         (UL(6) << ESR_ELx_AET_SHIFT)
 
 /* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_VNCR_SHIFT     (13)
+#define ESR_ELx_VNCR           (UL(1) << ESR_ELx_VNCR_SHIFT)
 #define ESR_ELx_SET_SHIFT      (11)
 #define ESR_ELx_SET_MASK       (UL(3) << ESR_ELx_SET_SHIFT)
 #define ESR_ELx_FnV_SHIFT      (10)
index f5ac454dcf66a32a192faf7918e61d420c720bbe..8fb1c8d5fd14290a5243557b0b79a6dabdafa916 100644 (file)
@@ -53,6 +53,7 @@
 #define KVM_REQ_RESYNC_PMU_EL0         KVM_ARCH_REQ(7)
 #define KVM_REQ_NESTED_S2_UNMAP                KVM_ARCH_REQ(8)
 #define KVM_REQ_GUEST_HYP_IRQ_PENDING  KVM_ARCH_REQ(9)
+#define KVM_REQ_MAP_L1_VNCR_EL2                KVM_ARCH_REQ(10)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
                                     KVM_DIRTY_LOG_INITIALLY_SET)
index 98b3d6b5896689bc1c921fbb2cd3104e2fd257b4..be4be8ec49d9ec4d7ef49626b5cc041ec88f1410 100644 (file)
@@ -335,5 +335,6 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
 
 /* VNCR management */
 int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
+int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM64_KVM_NESTED_H */
index b73dc26bc44b410903e2423551d61d4e65fd7e1e..9700627dd85f3a18cbcc71794b74a2c6791db5e6 100644 (file)
@@ -317,6 +317,7 @@ static exit_handle_fn arm_exit_handlers[] = {
        [ESR_ELx_EC_ERET]       = kvm_handle_eret,
        [ESR_ELx_EC_IABT_LOW]   = kvm_handle_guest_abort,
        [ESR_ELx_EC_DABT_LOW]   = kvm_handle_guest_abort,
+       [ESR_ELx_EC_DABT_CUR]   = kvm_handle_vncr_abort,
        [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
        [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
        [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
index 32ea6e362bab116f147cfefa66c2b1505a9cba58..d53c22f51009f2f0ea4d3a9a850026f57ccebbf1 100644 (file)
@@ -883,6 +883,165 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static u64 read_vncr_el2(struct kvm_vcpu *vcpu)
+{
+       return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48);
+}
+
+static int kvm_translate_vncr(struct kvm_vcpu *vcpu)
+{
+       bool write_fault, writable;
+       unsigned long mmu_seq;
+       struct vncr_tlb *vt;
+       struct page *page;
+       u64 va, pfn, gfn;
+       int ret;
+
+       vt = vcpu->arch.vncr_tlb;
+
+       vt->wi = (struct s1_walk_info) {
+               .regime = TR_EL20,
+               .as_el0 = false,
+               .pan    = false,
+       };
+       vt->wr = (struct s1_walk_result){};
+       vt->valid = false;
+
+       guard(srcu)(&vcpu->kvm->srcu);
+
+       va =  read_vncr_el2(vcpu);
+
+       ret = __kvm_translate_va(vcpu, &vt->wi, &vt->wr, va);
+       if (ret)
+               return ret;
+
+       write_fault = kvm_is_write_fault(vcpu);
+
+       mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+       smp_rmb();
+
+       gfn = vt->wr.pa >> PAGE_SHIFT;
+       pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page);
+       if (is_error_noslot_pfn(pfn) || (write_fault && !writable))
+               return -EFAULT;
+
+       scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
+               if (mmu_invalidate_retry(vcpu->kvm, mmu_seq))
+                       return -EAGAIN;
+
+               vt->gva = va;
+               vt->hpa = pfn << PAGE_SHIFT;
+               vt->valid = true;
+               vt->cpu = -1;
+
+               kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
+       }
+
+       kvm_release_faultin_page(vcpu->kvm, page, false, vt->wr.pw);
+       if (vt->wr.pw)
+               mark_page_dirty(vcpu->kvm, gfn);
+
+       return 0;
+}
+
+static void inject_vncr_perm(struct kvm_vcpu *vcpu)
+{
+       struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+       u64 esr = kvm_vcpu_get_esr(vcpu);
+
+       /* Adjust the fault level to reflect that of the guest's */
+       esr &= ~ESR_ELx_FSC;
+       esr |= FIELD_PREP(ESR_ELx_FSC,
+                         ESR_ELx_FSC_PERM_L(vt->wr.level));
+
+       kvm_inject_nested_sync(vcpu, esr);
+}
+
+static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
+{
+       struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+
+       lockdep_assert_held_read(&vcpu->kvm->mmu_lock);
+
+       if (!vt->valid)
+               return false;
+
+       if (read_vncr_el2(vcpu) != vt->gva)
+               return false;
+
+       if (vt->wr.nG) {
+               u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+               u64 ttbr = ((tcr & TCR_A1) ?
+                           vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+                           vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+               u16 asid;
+
+               asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
+               if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+                   !(tcr & TCR_ASID16))
+                       asid &= GENMASK(7, 0);
+
+               return asid != vt->wr.asid;
+       }
+
+       return true;
+}
+
+int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
+{
+       struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+       u64 esr = kvm_vcpu_get_esr(vcpu);
+
+       BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT));
+
+       if (esr_fsc_is_permission_fault(esr)) {
+               inject_vncr_perm(vcpu);
+       } else if (esr_fsc_is_translation_fault(esr)) {
+               bool valid;
+               int ret;
+
+               scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
+                       valid = kvm_vncr_tlb_lookup(vcpu);
+
+               if (!valid)
+                       ret = kvm_translate_vncr(vcpu);
+               else
+                       ret = -EPERM;
+
+               switch (ret) {
+               case -EAGAIN:
+               case -ENOMEM:
+                       /* Let's try again... */
+                       break;
+               case -EFAULT:
+               case -EINVAL:
+               case -ENOENT:
+               case -EACCES:
+                       /*
+                        * Translation failed, inject the corresponding
+                        * exception back to EL2.
+                        */
+                       BUG_ON(!vt->wr.failed);
+
+                       esr &= ~ESR_ELx_FSC;
+                       esr |= FIELD_PREP(ESR_ELx_FSC, vt->wr.fst);
+
+                       kvm_inject_nested_sync(vcpu, esr);
+                       break;
+               case -EPERM:
+                       /* Hack to deal with POE until we get kernel support */
+                       inject_vncr_perm(vcpu);
+                       break;
+               case 0:
+                       break;
+               }
+       } else {
+               WARN_ONCE(1, "Unhandled VNCR abort, ESR=%llx\n", esr);
+       }
+
+       return 1;
+}
+
 /*
  * Our emulated CPU doesn't support all the possible features. For the
  * sake of simplicity (and probably mental sanity), wipe out a number