]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: arm64: nv: Handle mapping of VNCR_EL2 at EL2
authorMarc Zyngier <maz@kernel.org>
Wed, 14 May 2025 10:34:53 +0000 (11:34 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 19 May 2025 07:01:19 +0000 (08:01 +0100)
Now that we can handle faults triggered through VNCR_EL2, we need
to map the corresponding page at EL2. But where, you'll ask?

Since each CPU in the system can run a vcpu, we need a per-CPU
mapping. For that, we carve a NR_CPUS range in the fixmap, giving
us a per-CPU va at which to map the guest's VNCR's page.

The mapping occurs both on vcpu load and on the back of a fault,
both generating a request that will take care of the mapping.
That mapping will also get dropped on vcpu put.

Yes, this is a bit heavy handed, but it is simple. Eventually,
we may want to have a per-VM, per-CPU mapping, which would avoid
all the TLBI overhead.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250514103501.2225951-11-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/fixmap.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/kvm/nested.c

index 87e307804b99c39148f0c58e03ef1fdd62a5a538..635a43c4ec85b87f7333eee90dfff24c0bec3596 100644 (file)
@@ -48,6 +48,12 @@ enum fixed_addresses {
        FIX_EARLYCON_MEM_BASE,
        FIX_TEXT_POKE0,
 
+#ifdef CONFIG_KVM
+       /* One slot per CPU, mapping the guest's VNCR page at EL2. */
+       FIX_VNCR_END,
+       FIX_VNCR = FIX_VNCR_END + NR_CPUS,
+#endif
+
 #ifdef CONFIG_ACPI_APEI_GHES
        /* Used for GHES mapping from assorted contexts */
        FIX_APEI_GHES_IRQ,
index 8fb1c8d5fd14290a5243557b0b79a6dabdafa916..d87fed0b483318170cbaba3448dd34ac681c6d4f 100644 (file)
@@ -658,6 +658,7 @@ struct kvm_host_data {
 #define KVM_HOST_DATA_FLAG_TRBE_ENABLED                        4
 #define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED      5
 #define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT         6
+#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED              7
        unsigned long flags;
 
        struct kvm_cpu_context host_ctxt;
index be4be8ec49d9ec4d7ef49626b5cc041ec88f1410..ea50cad1a6a29abaf2af155dc298b4641f3b4861 100644 (file)
@@ -337,4 +337,11 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
 int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
 int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
 
+#define vncr_fixmap(c)                                         \
+       ({                                                      \
+               u32 __c = (c);                                  \
+               BUG_ON(__c >= NR_CPUS);                         \
+               (FIX_VNCR - __c);                               \
+       })
+
 #endif /* __ARM64_KVM_NESTED_H */
index d53c22f51009f2f0ea4d3a9a850026f57ccebbf1..e81a0fcfb1f3fa9db774e5439ef9bd030b73b378 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
+#include <asm/fixmap.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
@@ -704,23 +705,35 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
 void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
 {
        /*
-        * The vCPU kept its reference on the MMU after the last put, keep
-        * rolling with it.
+        * If the vCPU kept its reference on the MMU after the last put,
+        * keep rolling with it.
         */
-       if (vcpu->arch.hw_mmu)
-               return;
-
        if (is_hyp_ctxt(vcpu)) {
-               vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+               if (!vcpu->arch.hw_mmu)
+                       vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
        } else {
-               write_lock(&vcpu->kvm->mmu_lock);
-               vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
-               write_unlock(&vcpu->kvm->mmu_lock);
+               if (!vcpu->arch.hw_mmu) {
+                       scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
+                               vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
+               }
+
+               if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
+                       kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
        }
 }
 
 void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
 {
+       /* Unconditionally drop the VNCR mapping if we have one */
+       if (host_data_test_flag(L1_VNCR_MAPPED)) {
+               BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
+               BUG_ON(is_hyp_ctxt(vcpu));
+
+               clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
+               vcpu->arch.vncr_tlb->cpu = -1;
+               host_data_clear_flag(L1_VNCR_MAPPED);
+       }
+
        /*
         * Keep a reference on the associated stage-2 MMU if the vCPU is
         * scheduling out and not in WFI emulation, suggesting it is likely to
@@ -1042,6 +1055,70 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
+{
+       struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+       pgprot_t prot;
+
+       guard(preempt)();
+       guard(read_lock)(&vcpu->kvm->mmu_lock);
+
+       /*
+        * The request to map VNCR may have raced against some other
+        * event, such as an interrupt, and may not be valid anymore.
+        */
+       if (is_hyp_ctxt(vcpu))
+               return;
+
+       /*
+        * Check that the pseudo-TLB is valid and that VNCR_EL2 still
+        * contains the expected value. If it doesn't, we simply bail out
+        * without a mapping -- a transformed MSR/MRS will generate the
+        * fault and allows us to populate the pseudo-TLB.
+        */
+       if (!vt->valid)
+               return;
+
+       if (read_vncr_el2(vcpu) != vt->gva)
+               return;
+
+       if (vt->wr.nG) {
+               u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+               u64 ttbr = ((tcr & TCR_A1) ?
+                           vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+                           vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+               u16 asid;
+
+               asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
+               if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+                   !(tcr & TCR_ASID16))
+                       asid &= GENMASK(7, 0);
+
+               if (asid != vt->wr.asid)
+                       return;
+       }
+
+       vt->cpu = smp_processor_id();
+
+       if (vt->wr.pw && vt->wr.pr)
+               prot = PAGE_KERNEL;
+       else if (vt->wr.pr)
+               prot = PAGE_KERNEL_RO;
+       else
+               prot = PAGE_NONE;
+
+       /*
+        * We can't map write-only (or no permission at all) in the kernel,
+        * but the guest can do it if using POE, so we'll have to turn a
+        * translation fault into a permission fault at runtime.
+        * FIXME: WO doesn't work at all, need POE support in the kernel.
+        */
+       if (pgprot_val(prot) != pgprot_val(PAGE_NONE)) {
+               __set_fixmap(vncr_fixmap(vt->cpu), vt->hpa, prot);
+               host_data_set_flag(L1_VNCR_MAPPED);
+       }
+}
+
 /*
  * Our emulated CPU doesn't support all the possible features. For the
  * sake of simplicity (and probably mental sanity), wipe out a number
@@ -1582,6 +1659,9 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
                write_unlock(&vcpu->kvm->mmu_lock);
        }
 
+       if (kvm_check_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu))
+               kvm_map_l1_vncr(vcpu);
+
        /* Must be last, as may switch context! */
        if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
                kvm_inject_nested_irq(vcpu);