]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
arm64: fpsimd: Move SVE save/restore inline
authorMark Rutland <mark.rutland@arm.com>
Wed, 3 Jun 2026 11:06:27 +0000 (12:06 +0100)
committerWill Deacon <will@kernel.org>
Wed, 3 Jun 2026 15:50:49 +0000 (16:50 +0100)
Currently the SVE register save/restore sequences are written in
out-of-line assembly routines. While this works, it's somewhat painful:

* As KVM needs to be able to use the sequences in hyp code, separate
  assembly files are used for the regular kernel and KVM code. While the
  common logic is shared in assembly macros, this still requires some
  duplication, and has lead to some trivial divergence.

* As the SVE LDR/STR instrucitons have limited addressing modes, the
  assembly macros use an awkward pattern requiring negative offsets.
  This could be written more clearly with addresses being generated in C
  code.

* As the FFR does not always exist in streaming mode, some awkward
  conditional branching has been written in assembly which could be
  clearer in C (and would permit the compiler to optimize out
  unnecessary branches in some cases).

* For historical reasons, the assembly macros take some register
  arguments as numerical indices (e.g. "sve_save 0, x1" uses x0 and x1),
  which is simply confusing.

* For historical reasons, the SVE save/restore code and FPSIMD
  save/restore code have a distinct sequences for FPSR and FPCR. Ideally
  this logic would be shared.

* The assembly sequences can't be instrumented, and so it's harder than
  necessary to catch memory safety issues.

To handle the above, move the SVE register save/restore sequences
to inline assembly.

Neither GCC nor LLVM instrument memory arguments to inline assembly, so
explicit instrumentation is added in the same manner as other assembly
routines. This instrumentation is implicitly disabled by Kbuild for nVHE
hyp code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/fpsimdmacros.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/kernel/entry-fpsimd.S
arch/arm64/kvm/hyp/fpsimd.S [deleted file]
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/nvhe/Makefile
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/vhe/Makefile

index fff6d54afd9fe1edb56f2c7eb0120fd328850ae8..8f1b844f000fa87c50a710b1266008858499b941 100644 (file)
@@ -215,8 +215,123 @@ static inline unsigned int sve_get_vl(void)
        return vl;
 }
 
-extern void sve_save_state(struct arm64_sve_state *state, int save_ffr);
-extern void sve_load_state(const struct arm64_sve_state *state, int restore_ffr);
+#define FOR_EACH_Z_REG(idx_str, asm_str)                                                                                       \
+       "       .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n"       \
+       asm_str "\n"                                                                                                            \
+       "       .endr\n"
+
+#define FOR_EACH_P_REG(idx_str, asm_str)                                                                                       \
+       "       .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"       \
+       asm_str "\n"                                                            \
+       "       .endr\n"
+
+static inline void __sve_save_z(struct arm64_sve_state *state, unsigned long vl)
+{
+       instrument_write(state, SVE_NUM_ZREGS * vl);
+       asm volatile(
+       __SVE_PREAMBLE
+       FOR_EACH_Z_REG("n", "str        z\\n, [%[zregs], #\\n, MUL VL]")
+       :
+       : [zregs] "r" (state)
+       : "memory"
+       );
+}
+
+static inline void __sve_load_z(const struct arm64_sve_state *state, unsigned long vl)
+{
+       instrument_read(state, SVE_NUM_ZREGS * vl);
+       asm volatile(
+       __SVE_PREAMBLE
+       FOR_EACH_Z_REG("n", "ldr        z\\n, [%[zregs], #\\n, MUL VL]")
+       :
+       : [zregs] "r" (state)
+       : "memory"
+       );
+}
+
+static inline void __sve_save_p(struct arm64_sve_state *state, unsigned long vl, bool ffr)
+{
+       void *pregs = (void *)state + SVE_NUM_ZREGS * vl;
+       unsigned long pl = vl / 8;
+       void *pffr = pregs + SVE_NUM_PREGS * pl;
+
+       instrument_write(pregs, SVE_NUM_PREGS * pl);
+       asm volatile(
+       __SVE_PREAMBLE
+       FOR_EACH_P_REG("n", "str        p\\n, [%[pregs], #\\n, MUL VL]\n")
+       :
+       : [pregs] "r" (pregs)
+       : "memory"
+       );
+
+       instrument_write(pffr, pl);
+       if (ffr) {
+               asm volatile(
+               __SVE_PREAMBLE
+               "       rdffr   p0.b\n"
+               "       str     p0, [%[pffr]]\n"
+               "       ldr     p0, [%[pregs]]\n"
+               :
+               : [pregs] "r" (pregs),
+                 [pffr] "r" (pffr)
+               : "memory"
+               );
+       } else {
+               asm volatile(
+               __SVE_PREAMBLE
+               "       pfalse  p0.b\n"
+               "       str     p0, [%[pffr]]\n"
+               "       ldr     p0, [%[pregs]]\n"
+               :
+               : [pregs] "r" (pregs),
+                 [pffr] "r" (pffr)
+               : "memory"
+               );
+       }
+}
+
+static inline void __sve_load_p(const struct arm64_sve_state *state, unsigned long vl, bool ffr)
+{
+       const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl;
+       unsigned long pl = vl / 8;
+       const void *pffr = pregs + SVE_NUM_PREGS * pl;
+
+       if (ffr) {
+               instrument_read(pffr, pl);
+               asm volatile(
+               __SVE_PREAMBLE
+               "       ldr     p0, [%[pffr]]\n"
+               "       wrffr   p0.b\n"
+               :
+               : [pffr] "r" (pffr)
+               : "memory"
+               );
+       }
+
+       instrument_read(pregs, SVE_NUM_PREGS * pl);
+       asm volatile(
+       __SVE_PREAMBLE
+       FOR_EACH_P_REG("n", "ldr        p\\n, [%[pregs], #\\n, MUL VL]\n")
+       :
+       : [pregs] "r" (pregs)
+       : "memory"
+       );
+}
+
+static inline void sve_save_state(struct arm64_sve_state *state, bool ffr)
+{
+       unsigned long vl = sve_get_vl();
+       __sve_save_z(state, vl);
+       __sve_save_p(state, vl, ffr);
+}
+
+static inline void sve_load_state(const struct arm64_sve_state *state, bool ffr)
+{
+       unsigned long vl = sve_get_vl();
+       __sve_load_z(state, vl);
+       __sve_load_p(state, vl, ffr);
+}
+
 extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
 extern void sme_save_state(struct arm64_sme_state *state, int zt);
 extern void sme_load_state(const struct arm64_sme_state *state, int zt);
index e613dc94dc3576ad2ab3997a55159d6f972c2a12..5f03fe51d0bff85e1e91422149e750f5ee730901 100644 (file)
 
 /* Deprecated macros for SVE instructions */
 
-/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_str_v nz, nxbase, offset=0
-       .arch_extension sve
-       str     z\nz, [X\nxbase, #\offset, MUL VL]
-.endm
-
-/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_ldr_v nz, nxbase, offset=0
-       .arch_extension sve
-       ldr     z\nz, [X\nxbase, #\offset, MUL VL]
-.endm
-
-/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_str_p np, nxbase, offset=0
-       .arch_extension sve
-       str     p\np, [X\nxbase, #\offset, MUL VL]
-.endm
-
-/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_ldr_p np, nxbase, offset=0
-       .arch_extension sve
-       ldr p\np, [x\nxbase, #\offset, MUL VL]
-.endm
-
-/* RDFFR (unpredicated): RDFFR P\np.B */
-.macro _sve_rdffr np
-       .arch_extension sve
-       rdffr p\np\().b
-.endm
-
 /* WRFFR P\np.B */
 .macro _sve_wrffr np
        .arch_extension sve
                _sve_wrffr      0
 .endm
 
-.macro _sve_pffr ptr
-       .arch_extension sve
-       addvl   \ptr, \ptr, #16
-       addvl   \ptr, \ptr, #16
-       addpl   \ptr, \ptr, #16
-.endm
-
-.macro sve_save nxbase, save_ffr
-               _sve_pffr       x\nxbase
- _for n, 0, 31,        _sve_str_v      \n, \nxbase, \n - 34
- _for n, 0, 15,        _sve_str_p      \n, \nxbase, \n - 16
-               cbz             \save_ffr, 921f
-               _sve_rdffr      0
-               b               922f
-921:
-               _sve_pfalse     0                       // Zero out FFR
-922:
-               _sve_str_p      0, \nxbase
-               _sve_ldr_p      0, \nxbase, -16
-.endm
-
-.macro sve_load nxbase, restore_ffr
-               _sve_pffr       x\nxbase
- _for n, 0, 31,        _sve_ldr_v      \n, \nxbase, \n - 34
-               cbz             \restore_ffr, 921f
-               _sve_ldr_p      0, \nxbase
-               _sve_wrffr      0
-921:
- _for n, 0, 15,        _sve_ldr_p      \n, \nxbase, \n - 16
-.endm
-
 .macro sme_save_za nxbase, xvl, nw
        mov     w\nw, #0
 
index 190c256e34c039501ebbe1bec8076511bc267101..ad19de1d0654fab0129c1b9a7c2c099ce5a1edff 100644 (file)
@@ -121,9 +121,6 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
 void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
 #endif
 
-void __sve_save_state(struct arm64_sve_state *sve, int save_ffr);
-void __sve_restore_state(struct arm64_sve_state *sve, int restore_ffr);
-
 u64 __guest_enter(struct kvm_vcpu *vcpu);
 
 bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
index 66668bfca5ae85dc1d9d0377a20175a5c7480085..f957536356255e158702cd96d66c68b1b422652b 100644 (file)
 
 #ifdef CONFIG_ARM64_SVE
 
-/*
- * Save the SVE state
- *
- * x0 - pointer to buffer for state
- * w1 - Save FFR if non-zero
- */
-SYM_FUNC_START(sve_save_state)
-       sve_save 0, w1
-       ret
-SYM_FUNC_END(sve_save_state)
-
-/*
- * Load the SVE state
- *
- * x0 - pointer to buffer for state
- * w1 - Restore FFR if non-zero
- */
-SYM_FUNC_START(sve_load_state)
-       sve_load 0, w1
-       ret
-SYM_FUNC_END(sve_load_state)
-
 /*
  * Zero all SVE registers but the first 128-bits of each vector
  *
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
deleted file mode 100644 (file)
index 00c56e3..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/fpsimdmacros.h>
-
-       .text
-
-SYM_FUNC_START(__sve_restore_state)
-       sve_load 0, w1
-       ret
-SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
-       sve_save 0, w1
-       ret
-SYM_FUNC_END(__sve_save_state)
index ee366a536c77b004ae3f9893878e08040f711bd1..1f12c4ba295a40d37db45f7e54ce01b8615a61dc 100644 (file)
@@ -467,7 +467,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
         * vCPU. Start off with the max VL so we can load the SVE state.
         */
        sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
-       __sve_restore_state(kern_hyp_va(vcpu->arch.sve_state), true);
+       sve_load_state(kern_hyp_va(vcpu->arch.sve_state), true);
        fpsimd_load_common(&vcpu->arch.ctxt.fp_regs);
 
        /*
@@ -488,7 +488,7 @@ static inline void __hyp_sve_save_host(void)
 
        ctxt_sys_reg(hctxt, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
        write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
-       __sve_save_state(sve_regs, true);
+       sve_save_state(sve_regs, true);
        fpsimd_save_common(&hctxt->fp_regs);
 }
 
index 62cdfbff75625c578a21c016798bbce569228f9e..f57450ebcb4989c0e94799257719cb5125fb812c 100644 (file)
@@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
         hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
         cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
-        ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
+        ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
 hyp-obj-y += ../../../kernel/smccc-call.o
 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
 hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
index dee9fedd9592a0cde564b5bf7cefdc590cd5b5ba..676f756e084df4d7aad766cae43e72e6b975de38 100644 (file)
@@ -35,7 +35,7 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
         * on the VL, so use a consistent (i.e., the maximum) guest VL.
         */
        sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
-       __sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
+       sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
        fpsimd_save_common(&vcpu->arch.ctxt.fp_regs);
        write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
 }
@@ -55,7 +55,7 @@ static void __hyp_sve_restore_host(void)
         * need to be revisited.
         */
        write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
-       __sve_restore_state(sve_regs, true);
+       sve_load_state(sve_regs, true);
        fpsimd_load_common(&hctxt->fp_regs);
        write_sysreg_el1(ctxt_sys_reg(hctxt, ZCR_EL1), SYS_ZCR);
 }
index 9695328bbd96ec0cd989ee2b903fd37cbc19041c..d6b3475145c0ef569a7acb0a86ad4dea24e082f5 100644 (file)
@@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
-        ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
+        ../hyp-entry.o ../exception.o ../vgic-v5-sr.o