From: Ard Biesheuvel Date: Wed, 1 Oct 2025 11:59:42 +0000 (+0200) Subject: arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4fa617cc6851488759f79dc8f66822ae332d98f0;p=thirdparty%2Flinux.git arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack Commit aefbab8e77eb16b5 ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch") added a 'kernel_fpsimd_state' field to struct thread_struct, which is the arch-specific portion of struct task_struct, and is allocated for each task in the system. The size of this field is 528 bytes, resulting in non-negligible bloat of task_struct, and the resulting memory overhead may impact performance on systems with many processes. This allocation is only used if the task is scheduled out or interrupted by a softirq while using the FP/SIMD unit in kernel mode, and so it is possible to transparently allocate this buffer on the caller's stack instead. So tweak the 'ksimd' scoped guard implementation so that a stack buffer is allocated and passed to both kernel_neon_begin() and kernel_neon_end(), and either record it in the task struct, or use it directly to preserve the task mode kernel FP/SIMD when running in softirq context. Passing the address to both functions, and checking the addresses for consistency ensures that callers of the updated bare begin/end API use it in a manner that is consistent with the new context switch semantics. Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel --- diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h index bdc4c6304c6a7..751e88a96734e 100644 --- a/arch/arm64/include/asm/fpu.h +++ b/arch/arm64/include/asm/fpu.h @@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void) { BUG_ON(!in_task()); preempt_disable(); - kernel_neon_begin(); + kernel_neon_begin(NULL); } static inline void kernel_fpu_end(void) { - kernel_neon_end(); + kernel_neon_end(NULL); preempt_enable(); } diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index d4b1d172a79b6..acebee4605b5c 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -13,7 +13,7 @@ #define cpu_has_neon() system_supports_fpsimd() -void kernel_neon_begin(void); -void kernel_neon_end(void); +void kernel_neon_begin(struct user_fpsimd_state *); +void kernel_neon_end(struct user_fpsimd_state *); #endif /* ! __ASM_NEON_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 61d62bfd5a7bf..de3c3b65461d0 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -172,7 +172,12 @@ struct thread_struct { unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ - struct user_fpsimd_state kernel_fpsimd_state; + /* + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the + * address of a caller provided buffer that will be used to preserve a + * task's kernel mode FPSIMD state while it is scheduled out. + */ + struct user_fpsimd_state *kernel_fpsimd_state; unsigned int kernel_fpsimd_cpu; #ifdef CONFIG_ARM64_PTR_AUTH struct ptrauth_keys_user keys_user; diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index d9f83c4787367..7ddb25df5c982 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) { #endif /* ! CONFIG_KERNEL_MODE_NEON */ -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end()) +DEFINE_LOCK_GUARD_1(ksimd, + struct user_fpsimd_state, + kernel_neon_begin(_T->lock), + kernel_neon_end(_T->lock)) -#define scoped_ksimd() scoped_guard(ksimd) +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e3f8f51748bc9..32e68c7eb3a25 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task) * Elide the load if this CPU holds the most recent kernel mode * FPSIMD context of the current task. */ - if (last->st == &task->thread.kernel_fpsimd_state && + if (last->st == task->thread.kernel_fpsimd_state && task->thread.kernel_fpsimd_cpu == smp_processor_id()) return; - fpsimd_load_state(&task->thread.kernel_fpsimd_state); + fpsimd_load_state(task->thread.kernel_fpsimd_state); } static void fpsimd_save_kernel_state(struct task_struct *task) { struct cpu_fp_state cpu_fp_state = { - .st = &task->thread.kernel_fpsimd_state, + .st = task->thread.kernel_fpsimd_state, .to_save = FP_STATE_FPSIMD, }; - fpsimd_save_state(&task->thread.kernel_fpsimd_state); + BUG_ON(!cpu_fp_state.st); + + fpsimd_save_state(task->thread.kernel_fpsimd_state); fpsimd_bind_state_to_cpu(&cpu_fp_state); task->thread.kernel_fpsimd_cpu = smp_processor_id(); @@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; + t->thread.kernel_fpsimd_state = NULL; /* * If we don't support fpsimd, bail out after we have * reset the fpsimd_cpu for this task and clear the @@ -1833,12 +1836,19 @@ void fpsimd_save_and_flush_cpu_state(void) * * The caller may freely use the FPSIMD registers until kernel_neon_end() is * called. + * + * Unless called from non-preemptible task context, @state must point to a + * caller provided buffer that will be used to preserve the task's kernel mode + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel + * mode FPSIMD occurring in softirq context. May be %NULL otherwise. */ -void kernel_neon_begin(void) +void kernel_neon_begin(struct user_fpsimd_state *state) { if (WARN_ON(!system_supports_fpsimd())) return; + WARN_ON((preemptible() || in_serving_softirq()) && !state); + BUG_ON(!may_use_simd()); get_cpu_fpsimd_context(); @@ -1846,7 +1856,7 @@ void kernel_neon_begin(void) /* Save unsaved fpsimd state, if any: */ if (test_thread_flag(TIF_KERNEL_FPSTATE)) { BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); - fpsimd_save_kernel_state(current); + fpsimd_save_state(state); } else { fpsimd_save_user_state(); @@ -1867,8 +1877,16 @@ void kernel_neon_begin(void) * mode in task context. So in this case, setting the flag here * is always appropriate. */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) { + /* + * Record the caller provided buffer as the kernel mode + * FP/SIMD buffer for this task, so that the state can + * be preserved and restored on a context switch. + */ + WARN_ON(current->thread.kernel_fpsimd_state != NULL); + current->thread.kernel_fpsimd_state = state; set_thread_flag(TIF_KERNEL_FPSTATE); + } } /* Invalidate any task state remaining in the fpsimd regs: */ @@ -1886,22 +1904,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin); * * The caller must not use the FPSIMD registers after this function is called, * unless kernel_neon_begin() is called again in the meantime. + * + * The value of @state must match the value passed to the preceding call to + * kernel_neon_begin(). */ -void kernel_neon_end(void) +void kernel_neon_end(struct user_fpsimd_state *state) { if (!system_supports_fpsimd()) return; + if (!test_thread_flag(TIF_KERNEL_FPSTATE)) + return; + /* * If we are returning from a nested use of kernel mode FPSIMD, restore * the task context kernel mode FPSIMD state. This can only happen when * running in softirq context on non-PREEMPT_RT. */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && - test_thread_flag(TIF_KERNEL_FPSTATE)) - fpsimd_load_kernel_state(current); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) { + fpsimd_load_state(state); + } else { clear_thread_flag(TIF_KERNEL_FPSTATE); + WARN_ON(current->thread.kernel_fpsimd_state != state); + current->thread.kernel_fpsimd_state = NULL; + } } EXPORT_SYMBOL_GPL(kernel_neon_end); @@ -1937,7 +1963,7 @@ void __efi_fpsimd_begin(void) WARN_ON(preemptible()); if (may_use_simd()) { - kernel_neon_begin(); + kernel_neon_begin(&efi_fpsimd_state); } else { /* * If !efi_sve_state, SVE can't be in use yet and doesn't need @@ -1986,7 +2012,7 @@ void __efi_fpsimd_end(void) return; if (!efi_fpsimd_state_used) { - kernel_neon_end(); + kernel_neon_end(&efi_fpsimd_state); } else { if (system_supports_sve() && efi_sve_state_used) { bool ffr = true;