From 55bc30f2e34dcc17a370d1f6c1c992be107c4502 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Apr 2025 23:11:23 +0200 Subject: [PATCH] x86/fpu: Remove the thread::fpu pointer As suggested by Oleg, remove the thread::fpu pointer, as we can calculate it via x86_task_fpu() at compile-time. This improves code generation a bit: kepler:~/tip> size vmlinux.before vmlinux.after text data bss dec hex filename 26475405 10435342 1740804 38651551 24dc69f vmlinux.before 26475339 10959630 1216516 38651485 24dc65d vmlinux.after Suggested-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Chang S. Bae Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Uros Bizjak Link: https://lore.kernel.org/r/20250409211127.3544993-5-mingo@kernel.org --- arch/x86/include/asm/processor.h | 5 +---- arch/x86/kernel/fpu/core.c | 4 +--- arch/x86/kernel/fpu/init.c | 1 - arch/x86/kernel/process.c | 2 -- arch/x86/kernel/vmlinux.lds.S | 4 ++++ 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ea7e5d2c4de0..b7f7c9c834098 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -514,12 +514,9 @@ struct thread_struct { struct thread_shstk shstk; #endif - - /* Floating point and extended processor state */ - struct fpu *fpu; }; -#define x86_task_fpu(task) ((task)->thread.fpu) +#define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task)))) /* * X86 doesn't need any embedded-FPU-struct quirks: diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 853a738fdf2d3..974b276ff0da2 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -600,13 +600,11 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal, * This is safe because task_struct size is a multiple of cacheline size. */ struct fpu *src_fpu = x86_task_fpu(current); - struct fpu *dst_fpu = (void *)dst + sizeof(*dst); + struct fpu *dst_fpu = x86_task_fpu(dst); BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0); BUG_ON(!src_fpu); - dst->thread.fpu = dst_fpu; - /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 848ea79886bac..da41a1d2c40f4 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -76,7 +76,6 @@ static struct fpu x86_init_fpu __attribute__ ((aligned (64))) __read_mostly; static void __init fpu__init_system_early_generic(void) { fpstate_reset(&x86_init_fpu); - current->thread.fpu = &x86_init_fpu; set_thread_flag(TIF_NEED_FPU_LOAD); x86_init_fpu.last_cpu = -1; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3ce4cce46f3f5..88868a90459e9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -102,8 +102,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif - /* Drop the copied pointer to current's fpstate */ - dst->thread.fpu = NULL; return 0; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ccdc45e5b7596..d9ca2d1754daf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -181,6 +181,10 @@ SECTIONS /* equivalent to task_pt_regs(&init_task) */ __top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE; + __x86_init_fpu_begin = .; + . = __x86_init_fpu_begin + 128*PAGE_SIZE; + __x86_init_fpu_end = .; + #ifdef CONFIG_X86_32 /* 32 bit has nosave before _edata */ NOSAVE_DATA -- 2.47.2