]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: arm64: nv: Honor SError exception routing / masking
authorOliver Upton <oliver.upton@linux.dev>
Tue, 8 Jul 2025 17:25:11 +0000 (10:25 -0700)
committerOliver Upton <oliver.upton@linux.dev>
Tue, 8 Jul 2025 18:36:31 +0000 (11:36 -0700)
To date KVM has used HCR_EL2.VSE to track the state of a pending SError
for the guest. With this bit set, hardware respects the EL1 exception
routing / masking rules and injects the vSError when appropriate.

This isn't correct for NV guests as hardware is oblivious to vEL2's
intentions for SErrors. Better yet, with FEAT_NV2 the guest can change
the routing behind our back as HCR_EL2 is redirected to memory. Cope
with this mess by:

 - Using a flag (instead of HCR_EL2.VSE) to track the pending SError
   state when SErrors are unconditionally masked for the current context

 - Resampling the routing / masking of a pending SError on every guest
   entry/exit

 - Emulating exception entry when SError routing implies a translation
   regime change

Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20250708172532.1699409-7-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/emulate-nested.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/exception.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/nested.c

index 3a27ed4de9ac183d44078656e5823db45d673d2b..daa0410aaebf27b378a6e4529b77f5d5528716a6 100644 (file)
@@ -45,7 +45,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
 void kvm_skip_instr32(struct kvm_vcpu *vcpu);
 
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
 int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
 void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
 
@@ -59,12 +59,25 @@ static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
        return kvm_inject_sea(vcpu, true, addr);
 }
 
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
+{
+       /*
+        * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+        * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+        *
+        * Set the bit when injecting an SError w/o an ESR to indicate ISS
+        * does not follow the architected format.
+        */
+       return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
+}
+
 void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
 
 void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
 int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
 int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
 int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
 
 static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
 {
@@ -205,6 +218,11 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
        return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
 }
 
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+       return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
 static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
 {
        bool e2h, tge;
index d27079968341c00ef98ffc47fb35d4737feef3af..8af4a5d400779c8ef686596f769418c8f263cb1e 100644 (file)
@@ -817,7 +817,7 @@ struct kvm_vcpu_arch {
        u8 iflags;
 
        /* State flags for kernel bookkeeping, unused by the hypervisor code */
-       u8 sflags;
+       u16 sflags;
 
        /*
         * Don't run the guest (internal implementation need).
@@ -953,9 +953,21 @@ struct kvm_vcpu_arch {
                __vcpu_flags_preempt_enable();                  \
        } while (0)
 
+#define __vcpu_test_and_clear_flag(v, flagset, f, m)           \
+       ({                                                      \
+               typeof(v->arch.flagset) set;                    \
+                                                               \
+               set = __vcpu_get_flag(v, flagset, f, m);        \
+               __vcpu_clear_flag(v, flagset, f, m);            \
+                                                               \
+               set;                                            \
+       })
+
 #define vcpu_get_flag(v, ...)  __vcpu_get_flag((v), __VA_ARGS__)
 #define vcpu_set_flag(v, ...)  __vcpu_set_flag((v), __VA_ARGS__)
 #define vcpu_clear_flag(v, ...)        __vcpu_clear_flag((v), __VA_ARGS__)
+#define vcpu_test_and_clear_flag(v, ...)                       \
+       __vcpu_test_and_clear_flag((v), __VA_ARGS__)
 
 /* KVM_ARM_VCPU_INIT completed */
 #define VCPU_INITIALIZED       __vcpu_single_flag(cflags, BIT(0))
@@ -1015,6 +1027,8 @@ struct kvm_vcpu_arch {
 #define IN_WFI                 __vcpu_single_flag(sflags, BIT(6))
 /* KVM is currently emulating a nested ERET */
 #define IN_NESTED_ERET         __vcpu_single_flag(sflags, BIT(7))
+/* SError pending for nested guest */
+#define NESTED_SERROR_PENDING  __vcpu_single_flag(sflags, BIT(8))
 
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1387,8 +1401,6 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
        return (vcpu_arch->steal.base != INVALID_GPA);
 }
 
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
-
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
 DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
index 0bd07ea068a1f628f7345a9b1239055c264ac99b..7fd76f41c296af8a785ac0bed3b2561fcacdbebb 100644 (file)
@@ -80,6 +80,8 @@ extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
 extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
 
 extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
+extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
 
 struct kvm_s2_trans {
        phys_addr_t output;
index bbe7b98e1ce3bf96a0e2673756ff52d9f5961892..c664f3a7883aa231ef01c283bd6d5e1250c98369 100644 (file)
@@ -1188,6 +1188,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 */
                preempt_disable();
 
+               kvm_nested_flush_hwstate(vcpu);
+
                if (kvm_vcpu_has_pmu(vcpu))
                        kvm_pmu_flush_hwstate(vcpu);
 
@@ -1287,6 +1289,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                /* Exit types that need handling before we can be preempted */
                handle_exit_early(vcpu, ret);
 
+               kvm_nested_sync_hwstate(vcpu);
+
                preempt_enable();
 
                /*
index 65a2471c5638f995a935306b73f46de7e0c4ce44..b01a482b41bed1061c0b41ee2f45c64e6771527e 100644 (file)
@@ -2714,6 +2714,9 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
        case except_type_irq:
                kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
                break;
+       case except_type_serror:
+               kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+               break;
        default:
                WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
        }
@@ -2821,3 +2824,14 @@ int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
        vcpu_write_sys_reg(vcpu, FAR_EL2, addr);
        return kvm_inject_nested_sync(vcpu, esr);
 }
+
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr)
+{
+       /*
+        * Hardware sets up the EC field when propagating ESR as a result of
+        * vSError injection. Manually populate EC for an emulated SError
+        * exception.
+        */
+       esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+       return kvm_inject_nested(vcpu, esr, except_type_serror);
+}
index 8983a43fb45e9a475fcf8fb3cc8de40b3f86a189..e2702718d56d2033daa6d79d2a575463737ce776 100644 (file)
@@ -818,8 +818,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
                              struct kvm_vcpu_events *events)
 {
-       events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
        events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
+       events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) ||
+                                          vcpu_get_flag(vcpu, NESTED_SERROR_PENDING);
 
        if (events->exception.serror_pending && events->exception.serror_has_esr)
                events->exception.serror_esr = vcpu_get_vsesr(vcpu);
@@ -839,23 +840,29 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
        bool serror_pending = events->exception.serror_pending;
        bool has_esr = events->exception.serror_has_esr;
        bool ext_dabt_pending = events->exception.ext_dabt_pending;
+       u64 esr = events->exception.serror_esr;
        int ret = 0;
 
-       if (serror_pending && has_esr) {
-               if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
-                       return -EINVAL;
-
-               if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
-                       kvm_set_sei_esr(vcpu, events->exception.serror_esr);
-               else
-                       return -EINVAL;
-       } else if (serror_pending) {
-               kvm_inject_vabt(vcpu);
-       }
-
        if (ext_dabt_pending)
                ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
 
+       if (ret < 0)
+               return ret;
+
+       if (!serror_pending)
+               return 0;
+
+       if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr)
+               return -EINVAL;
+
+       if (has_esr && (esr & ~ESR_ELx_ISS_MASK))
+               return -EINVAL;
+
+       if (has_esr)
+               ret = kvm_inject_serror_esr(vcpu, esr);
+       else
+               ret = kvm_inject_serror(vcpu);
+
        return (ret < 0) ? ret : 0;
 }
 
index c37c58d9d25d6c9d1a25781cab3eb56eda55ebd1..a598072f36d2ca5049e9a3f29f5beb58a86b41c8 100644 (file)
@@ -32,7 +32,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *);
 static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
 {
        if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
-               kvm_inject_vabt(vcpu);
+               kvm_inject_serror(vcpu);
 }
 
 static int handle_hvc(struct kvm_vcpu *vcpu)
@@ -490,7 +490,7 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
 
                        kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
                } else {
-                       kvm_inject_vabt(vcpu);
+                       kvm_inject_serror(vcpu);
                }
 
                return;
index 6a2a899a344e6437609edc138b4ede9425ec3263..592adc78b1496881aa0378e408797239013f7f78 100644 (file)
@@ -347,9 +347,13 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
                        enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
                        break;
 
+               case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
+                       enter_exception64(vcpu, PSR_MODE_EL2h, except_type_serror);
+                       break;
+
                default:
                        /*
-                        * Only EL1_SYNC and EL2_{SYNC,IRQ} makes
+                        * Only EL1_SYNC and EL2_{SYNC,IRQ,SERR} makes
                         * sense so far. Everything else gets silently
                         * ignored.
                         */
index d9fa4046b602170b37e1d9b59d19c63adac64974..10773a8ef4cbb3d3296d21c5e81e79ebcee4a56e 100644 (file)
@@ -219,25 +219,30 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
                inject_undef64(vcpu);
 }
 
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
+static bool kvm_serror_target_is_el2(struct kvm_vcpu *vcpu)
 {
-       vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
-       *vcpu_hcr(vcpu) |= HCR_VSE;
+       return is_hyp_ctxt(vcpu) || vcpu_el2_amo_is_set(vcpu);
 }
 
-/**
- * kvm_inject_vabt - inject an async abort / SError into the guest
- * @vcpu: The VCPU to receive the exception
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- *
- * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
- * the remaining ISS all-zeros so that this error is not interpreted as an
- * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
- * value, so the CPU generates an imp-def value.
- */
-void kvm_inject_vabt(struct kvm_vcpu *vcpu)
+static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu)
 {
-       kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
+       return !(vcpu_el2_tge_is_set(vcpu) || vcpu_el2_amo_is_set(vcpu));
+}
+
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr)
+{
+       lockdep_assert_held(&vcpu->mutex);
+
+       if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu))
+               return kvm_inject_nested_serror(vcpu, esr);
+
+       if (vcpu_is_el2(vcpu) && kvm_serror_undeliverable_at_el2(vcpu)) {
+               vcpu_set_vsesr(vcpu, esr);
+               vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+               return 1;
+       }
+
+       vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
+       *vcpu_hcr(vcpu) |= HCR_VSE;
+       return 1;
 }
index f05d70dd6d51dd9dd3f3ad36f11b3178e2eb0432..2c3094181f9cd129b52756d734e398c09695d6cb 100644 (file)
@@ -1808,7 +1808,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
                 * There is no need to pass the error into the guest.
                 */
                if (kvm_handle_guest_sea())
-                       kvm_inject_vabt(vcpu);
+                       return kvm_inject_serror(vcpu);
 
                return 1;
        }
index 5b191f4dc5668c89682082d27409327044ff512a..4218172ed1a4c433b42ba7c0b2856c3ef5f4935e 100644 (file)
@@ -1782,3 +1782,43 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
        if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
                kvm_inject_nested_irq(vcpu);
 }
+
+/*
+ * One of the many architectural bugs in FEAT_NV2 is that the guest hypervisor
+ * can write to HCR_EL2 behind our back, potentially changing the exception
+ * routing / masking for even the host context.
+ *
+ * What follows is some slop to (1) react to exception routing / masking and (2)
+ * preserve the pending SError state across translation regimes.
+ */
+void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+       if (!vcpu_has_nv(vcpu))
+               return;
+
+       if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+               kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
+
+void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+       unsigned long *hcr = vcpu_hcr(vcpu);
+
+       if (!vcpu_has_nv(vcpu))
+               return;
+
+       /*
+        * We previously decided that an SError was deliverable to the guest.
+        * Reap the pending state from HCR_EL2 and...
+        */
+       if (unlikely(__test_and_clear_bit(__ffs(HCR_VSE), hcr)))
+               vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+
+       /*
+        * Re-attempt SError injection in case the deliverability has changed,
+        * which is necessary to faithfully emulate WFI the case of a pending
+        * SError being a wakeup condition.
+        */
+       if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+               kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}