From: Mark Rutland Date: Wed, 3 Jun 2026 11:06:28 +0000 (+0100) Subject: arm64: fpsimd: Move sve_flush_live() inline X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=18618d9ea1fbc251970fe8a072f6fe10266320e0;p=thirdparty%2Flinux.git arm64: fpsimd: Move sve_flush_live() inline Currently sve_flush_live() is written in out-of-line assembly. It would be nice if we could move it inline such that control flow can be written more clearly in C, and to permit the removal of otherwise unused assembly macros. The 'flush_ffr' argument is redundant as sve_flush_live() is always called from non-streaming mode, and all callers pass 'true'. Remove the argument and make it a requirement that the function is called from non-streaming mode. The 'vq_minus_1' argument is unnecessary, as sve_flush_live() can read the live VL directly using the RDVL instruction (wrapped by the sve_get_vl() helper function). Move the function to C, with the simplifications above. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Reviewed-by: Vladimir Murzin Cc: Catalin Marinas Cc: Fuad Tabba Cc: James Morse Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Signed-off-by: Will Deacon --- diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 8f1b844f000f..9dfe53204ebf 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -332,7 +332,30 @@ static inline void sve_load_state(const struct arm64_sve_state *state, bool ffr) __sve_load_p(state, vl, ffr); } -extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); +/* + * Zero all SVE registers except for the first 128 bits of each vector. + * + * The caller must ensure that the VL has been configured and the CPU must be + * in non-streaming mode. + */ +static inline void sve_flush_live(void) +{ + unsigned long vl = sve_get_vl(); + + if (vl > sizeof(__uint128_t)) { + asm volatile( + __FPSIMD_PREAMBLE + FOR_EACH_Z_REG("n", "mov v\\n\\().16b, v\\n\\().16b") + ); + } + + asm volatile( + __SVE_PREAMBLE + FOR_EACH_P_REG("n", "pfalse p\\n\\().b") + " wrffr p0.b\n" + ); +} + extern void sme_save_state(struct arm64_sme_state *state, int zt); extern void sme_load_state(const struct arm64_sme_state *state, int zt); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 5f03fe51d0bf..9e352b5c6b76 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -40,20 +40,6 @@ .endif .endm -/* Deprecated macros for SVE instructions */ - -/* WRFFR P\np.B */ -.macro _sve_wrffr np - .arch_extension sve - wrffr p\np\().b -.endm - -/* PFALSE P\np.B */ -.macro _sve_pfalse np - .arch_extension sve - pfalse p\np\().b -.endm - /* Deprecated macros for SME instructions */ /* RDSVL X\nx, #\imm */ @@ -131,22 +117,6 @@ .purgem _for__body .endm -/* Preserve the first 128-bits of Znz and zero the rest. */ -.macro _sve_flush_z nz - _sve_check_zreg \nz - mov v\nz\().16b, v\nz\().16b -.endm - -.macro sve_flush_z - _for n, 0, 31, _sve_flush_z \n -.endm -.macro sve_flush_p - _for n, 0, 15, _sve_pfalse \n -.endm -.macro sve_flush_ffr - _sve_wrffr 0 -.endm - .macro sme_save_za nxbase, xvl, nw mov w\nw, #0 diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index cb54335465f6..2352297330e1 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -237,12 +237,8 @@ static inline void fpsimd_syscall_enter(void) if (!system_supports_sve()) return; - if (test_thread_flag(TIF_SVE)) { - unsigned int sve_vq_minus_one; - - sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_flush_live(true, sve_vq_minus_one); - } + if (test_thread_flag(TIF_SVE)) + sve_flush_live(); /* * Any live non-FPSIMD SVE state has been zeroed. Allow diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index f95753635625..2a4755113b99 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -11,28 +11,6 @@ #include #include -#ifdef CONFIG_ARM64_SVE - -/* - * Zero all SVE registers but the first 128-bits of each vector - * - * VQ must already be configured by caller, any further updates of VQ - * will need to ensure that the register state remains valid. - * - * x0 = include FFR? - * x1 = VQ - 1 - */ -SYM_FUNC_START(sve_flush_live) - cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state - sve_flush_z -1: sve_flush_p - tbz x0, #0, 2f - sve_flush_ffr -2: ret -SYM_FUNC_END(sve_flush_live) - -#endif /* CONFIG_ARM64_SVE */ - #ifdef CONFIG_ARM64_SME /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b9506422d29c..25dc5afe9ba0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1338,7 +1338,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current)); sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1); - sve_flush_live(true, vq - 1); + sve_flush_live(); fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current);