From: Mark Rutland Date: Wed, 3 Jun 2026 11:06:29 +0000 (+0100) Subject: arm64: fpsimd: Move SME save/restore inline X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=bfdfafd907204dd1ca7c59e175c2d636ff2361c4;p=thirdparty%2Flinux.git arm64: fpsimd: Move SME save/restore inline Currently the SVE register save/restore sequences are written in out-of-line assembly routines. While this works, it's somewhat painful: * For KVM to use the sequences, portions of the logic will need to be duplicated in KVM hyp code. While the common logic can be shared in assembly macros, this is very likely to lead to unnecessary divergence and be a maintenance burden. * For historical reasons, the assembly macros take some register arguments as numerical indices (e.g. "sme_save_za 0, x2, 12" uses x0, x1, and x12), which is simply confusing. * Address generation and control flow are far clearer in C than in assembly. * The assembly sequences can't be instrumented, and so it's harder than necessary to catch memory safety issues. To handle the above, move the SME register save/restore sequences to inline assembly. Neither GCC nor LLVM instrument memory arguments to inline assembly, so explicit instrumentation is added in the same manner as other assembly routines. This instrumentation is implicitly disabled by Kbuild for nVHE hyp code. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Reviewed-by: Vladimir Murzin Cc: Catalin Marinas Cc: Fuad Tabba Cc: James Morse Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Signed-off-by: Will Deacon --- diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9dfe53204ebf..a67d5774e672 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -356,9 +356,6 @@ static inline void sve_flush_live(void) ); } -extern void sme_save_state(struct arm64_sme_state *state, int zt); -extern void sme_load_state(const struct arm64_sme_state *state, int zt); - struct arm64_cpu_capabilities; extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused); extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused); @@ -638,6 +635,106 @@ static inline size_t __sme_state_size(unsigned int sme_vl) return size; } +static inline void __sme_save_za(struct arm64_sme_state *state, unsigned long svl) +{ + /* + * The argument to LDR/STR (array vector) can only encode W12-W15. + * The "Ucj" constraint exists for this, but is only supported by GCC + * 14.1.0+ and LLVM 18.1.0+. + */ + register unsigned int v asm ("w12"); + + instrument_write(state, svl * svl); + for (v = 0; v < svl; v++) { + void *pav = (void *)state + v * svl; + + asm volatile( + __SME_PREAMBLE + " str za[%w[v], #0], [%[pav]]\n" + : + : [v] "r" (v), + [pav] "r" (pav) + : "memory" + ); + } +} + +static inline void __sme_load_za(const struct arm64_sme_state *state, unsigned long svl) +{ + /* See comment in __sme_save_za */ + register unsigned int v asm ("w12"); + + instrument_read(state, svl * svl); + for (v = 0; v < svl; v++) { + void *pav = (void *)state + v * svl; + + asm volatile( + __SME_PREAMBLE + " ldr za[%w[v], #0], [%[pav]]\n" + : + : [v] "r" (v), + [pav] "r" (pav) + : "memory" + ); + } +} + +static inline void __sme_save_zt(struct arm64_sme_state *state, unsigned long svl) +{ + void *pzt = (void *)state + svl * svl; + + instrument_write(pzt, 64); + asm volatile( + __DEFINE_ASM_GPR_NUMS + /* + * STR ZT0, [] + * Supported by binutils 2.41+. + * Supported by LLVM 16+ + */ + " .inst 0xe13f8000 | ((.L__gpr_num_%[pzt]) << 5)\n" + : + : [pzt] "r" (pzt) + : "memory" + ); +} + +static inline void __sme_load_zt(const struct arm64_sme_state *state, unsigned long svl) +{ + void *pzt = (void *)state + svl * svl; + + instrument_read(pzt, 64); + asm volatile( + __DEFINE_ASM_GPR_NUMS + /* + * LDR ZT0, [] + * Supported by binutils 2.41+. + * Supported by LLVM 16+ + */ + " .inst 0xe11f8000 | ((.L__gpr_num_%[pzt]) << 5)\n" + : + : [pzt] "r" (pzt) + : "memory" + ); +} + +static inline void sme_save_state(struct arm64_sme_state *state, bool zt) +{ + unsigned long svl = sme_get_vl(); + + __sme_save_za(state, svl); + if (zt) + __sme_save_zt(state, svl); +} + +static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) +{ + unsigned long svl = sme_get_vl(); + + __sme_load_za(state, svl); + if (zt) + __sme_load_zt(state, svl); +} + /* * Return how many bytes of memory are required to store the full SME * specific state for task, given task's currently configured vector @@ -694,6 +791,9 @@ static inline size_t sme_state_size(struct task_struct const *task) return 0; } +static inline void sme_save_state(struct arm64_sme_state *state, bool zt) { BUILD_BUG(); } +static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) { BUILD_BUG(); } + static inline void sme_enter_from_user_mode(void) { } static inline void sme_exit_to_user_mode(void) { } diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 9e352b5c6b76..a763fd03ffef 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -40,60 +40,6 @@ .endif .endm -/* Deprecated macros for SME instructions */ - -/* RDSVL X\nx, #\imm */ -.macro _sme_rdsvl nx, imm - .arch_extension sme - rdsvl x\nx, #\imm -.endm - -/* - * STR (vector from ZA array): - * STR ZA[W\nw, #\offset], [X\nxbase, #\offset, MUL VL] - */ -.macro _sme_str_zav nw, nxbase, offset=0 - .arch_extension sme - str za[w\nw, #\offset], [x\nxbase, #\offset, MUL VL] -.endm - -/* - * LDR (vector to ZA array): - * LDR ZA[w\nw, #\offset], [X\nxbase, #\offset, MUL VL] - */ -.macro _sme_ldr_zav nw, nxbase, offset=0 - .arch_extension sme - ldr za[w\nw, #\offset], [x\nxbase, #\offset, MUL VL] -.endm - -/* - * SME2 instruction encodings for older assemblers. - * Supported by binutils 2.41+. - * Supported by LLVM 16+ - */ - -/* - * LDR (ZT0) - * - * LDR ZT0, nx - */ -.macro _ldr_zt nx - _check_general_reg \nx - .inst 0xe11f8000 \ - | (\nx << 5) -.endm - -/* - * STR (ZT0) - * - * STR ZT0, nx - */ -.macro _str_zt nx - _check_general_reg \nx - .inst 0xe13f8000 \ - | (\nx << 5) -.endm - .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from @@ -116,25 +62,3 @@ .purgem _for__body .endm - -.macro sme_save_za nxbase, xvl, nw - mov w\nw, #0 - -423: - _sme_str_zav \nw, \nxbase - add x\nxbase, x\nxbase, \xvl - add x\nw, x\nw, #1 - cmp \xvl, x\nw - bne 423b -.endm - -.macro sme_load_za nxbase, xvl, nw - mov w\nw, #0 - -423: - _sme_ldr_zav \nw, \nxbase - add x\nxbase, x\nxbase, \xvl - add x\nw, x\nw, #1 - cmp \xvl, x\nw - bne 423b -.endm diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 74b76bb70452..d2690c3ec528 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -27,7 +27,7 @@ KCOV_INSTRUMENT_idle.o := n # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ - entry-common.o entry-fpsimd.o process.o ptrace.o \ + entry-common.o process.o ptrace.o \ setup.o signal.o sys.o stacktrace.o time.o traps.o \ io.o vdso.o hyp-stub.o psci.o cpu_ops.o \ return_address.o cpuinfo.o cpu_errata.o \ diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S deleted file mode 100644 index 2a4755113b99..000000000000 --- a/arch/arm64/kernel/entry-fpsimd.S +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * FP/SIMD state saving and restoring - * - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas - */ - -#include - -#include -#include - -#ifdef CONFIG_ARM64_SME - -/* - * Save the ZA and ZT state - * - * x0 - pointer to buffer for state - * w1 - number of ZT registers to save - */ -SYM_FUNC_START(sme_save_state) - _sme_rdsvl 2, 1 // x2 = VL/8 - sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA - - cbz w1, 1f - _str_zt 0 -1: - ret -SYM_FUNC_END(sme_save_state) - -/* - * Load the ZA and ZT state - * - * x0 - pointer to buffer for state - * w1 - number of ZT registers to save - */ -SYM_FUNC_START(sme_load_state) - _sme_rdsvl 2, 1 // x2 = VL/8 - sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA - - cbz w1, 1f - _ldr_zt 0 -1: - ret -SYM_FUNC_END(sme_load_state) - -#endif /* CONFIG_ARM64_SME */