]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: arm64: Disable TRBE Trace Buffer Unit when running in guest context
authorWill Deacon <will@kernel.org>
Fri, 27 Mar 2026 13:00:44 +0000 (13:00 +0000)
committerMarc Zyngier <maz@kernel.org>
Sat, 28 Mar 2026 17:07:49 +0000 (17:07 +0000)
The nVHE world-switch code relies on zeroing TRFCR_EL1 to disable trace
generation in guest context when self-hosted TRBE is in use by the host.

Per D3.2.1 ("Controls to prohibit trace at Exception levels"), clearing
TRFCR_EL1 means that trace generation is prohibited at EL1 and EL0 but
per R_YCHKJ the Trace Buffer Unit will still be enabled if
TRBLIMITR_EL1.E is set. R_SJFRQ goes on to state that, when enabled, the
Trace Buffer Unit can perform address translation for the "owning
exception level" even when it is out of context.

Consequently, we can end up in a state where TRBE performs speculative
page-table walks for a host VA/IPA in guest/hypervisor context depending
on the value of MDCR_EL2.E2TB, which changes over world-switch. The
potential result appears to be a heady mixture of SErrors, data
corruption and hardware lockups.

Extend the TRBE world-switch code to clear TRBLIMITR_EL1.E after
draining the buffer, restoring the register on return to the host. This
unfortunately means we need to tackle CPU errata #2064142 and #2038923
which add additional synchronisation requirements around manipulations
of the limit register. Hopefully this doesn't need to be fast.

Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: James Clark <james.clark@linaro.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Fixes: a1319260bf62 ("arm64: KVM: Enable access to TRBE support for host")
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://patch.msgid.link/20260327130047.21065-2-will@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/hyp/nvhe/debug-sr.c
arch/arm64/kvm/hyp/nvhe/switch.c

index 70cb9cfd760a3689426a7aab829de9136f8c22c7..b1335c55dbef50194ad3bef672a5208a58464c75 100644 (file)
@@ -770,6 +770,7 @@ struct kvm_host_data {
                u64 pmscr_el1;
                /* Self-hosted trace */
                u64 trfcr_el1;
+               u64 trblimitr_el1;
                /* Values of trap registers for the host before guest entry. */
                u64 mdcr_el2;
                u64 brbcr_el1;
index 2a1c0f49792bf344c405190c68fdce0c9b9a35bf..0955af771ad1d8ec96e9f20f2d5ad89f36a7a39f 100644 (file)
@@ -57,12 +57,54 @@ static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
        write_sysreg_el1(new_trfcr, SYS_TRFCR);
 }
 
-static bool __trace_needs_drain(void)
+static void __trace_drain_and_disable(void)
 {
-       if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE))
-               return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E;
+       u64 *trblimitr_el1 = host_data_ptr(host_debug_state.trblimitr_el1);
+       bool needs_drain = is_protected_kvm_enabled() ?
+                          host_data_test_flag(HAS_TRBE) :
+                          host_data_test_flag(TRBE_ENABLED);
 
-       return host_data_test_flag(TRBE_ENABLED);
+       if (!needs_drain) {
+               *trblimitr_el1 = 0;
+               return;
+       }
+
+       *trblimitr_el1 = read_sysreg_s(SYS_TRBLIMITR_EL1);
+       if (*trblimitr_el1 & TRBLIMITR_EL1_E) {
+               /*
+                * The host has enabled the Trace Buffer Unit so we have
+                * to beat the CPU with a stick until it stops accessing
+                * memory.
+                */
+
+               /* First, ensure that our prior write to TRFCR has stuck. */
+               isb();
+
+               /* Now synchronise with the trace and drain the buffer. */
+               tsb_csync();
+               dsb(nsh);
+
+               /*
+                * With no more trace being generated, we can disable the
+                * Trace Buffer Unit.
+                */
+               write_sysreg_s(0, SYS_TRBLIMITR_EL1);
+               if (cpus_have_final_cap(ARM64_WORKAROUND_2064142)) {
+                       /*
+                        * Some CPUs are so good, we have to drain 'em
+                        * twice.
+                        */
+                       tsb_csync();
+                       dsb(nsh);
+               }
+
+               /*
+                * Ensure that the Trace Buffer Unit is disabled before
+                * we start mucking with the stage-2 and trap
+                * configuration.
+                */
+               isb();
+       }
 }
 
 static bool __trace_needs_switch(void)
@@ -79,15 +121,26 @@ static void __trace_switch_to_guest(void)
 
        __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
                          *host_data_ptr(trfcr_while_in_guest));
-
-       if (__trace_needs_drain()) {
-               isb();
-               tsb_csync();
-       }
+       __trace_drain_and_disable();
 }
 
 static void __trace_switch_to_host(void)
 {
+       u64 trblimitr_el1 = *host_data_ptr(host_debug_state.trblimitr_el1);
+
+       if (trblimitr_el1 & TRBLIMITR_EL1_E) {
+               /* Re-enable the Trace Buffer Unit for the host. */
+               write_sysreg_s(trblimitr_el1, SYS_TRBLIMITR_EL1);
+               isb();
+               if (cpus_have_final_cap(ARM64_WORKAROUND_2038923)) {
+                       /*
+                        * Make sure the unit is re-enabled before we
+                        * poke TRFCR.
+                        */
+                       isb();
+               }
+       }
+
        __trace_do_switch(host_data_ptr(trfcr_while_in_guest),
                          *host_data_ptr(host_debug_state.trfcr_el1));
 }
index 779089e42681e8c4add0e404e55a2f9b912ba38a..f00688e69d88cd65597ea11dfe4c644d78ef6213 100644 (file)
@@ -278,7 +278,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
         * We're about to restore some new MMU state. Make sure
         * ongoing page-table walks that have started before we
         * trapped to EL2 have completed. This also synchronises the
-        * above disabling of BRBE, SPE and TRBE.
+        * above disabling of BRBE and SPE.
         *
         * See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
         * rule R_LFHQG and subsequent information statements.