From: Lisa Robinson Date: Wed, 22 Apr 2026 07:45:11 +0000 (+0800) Subject: LoongArch: Align FPU register state to 32 bytes X-Git-Tag: v7.1-rc1~28^2~17 X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=e3f4591f7920ce169f2f78fa5a89639ada7d7058;p=thirdparty%2Flinux.git LoongArch: Align FPU register state to 32 bytes Move fpr to the beginning of struct loongarch_fpu so it is naturally aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context switch performance. Also adjust process.c and fpu.S to work well with the new loongarch_fpu layout. Signed-off-by: Lisa Robinson Signed-off-by: Huacai Chen --- diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index c3bc44b5f5b3..ce8b953f8c79 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32) BUILD_FPR_ACCESS(64) struct loongarch_fpu { + union fpureg fpr[NUM_FPU_REGS]; uint64_t fcc; /* 8x8 */ uint32_t fcsr; uint32_t ftop; - union fpureg fpr[NUM_FPU_REGS]; }; struct loongarch_lbt { diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index f225dcc5b530..bf7d6b8bf600 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -97,7 +97,7 @@ .endm #ifdef CONFIG_32BIT - .macro sc_save_fcc thread tmp0 tmp1 + .macro sc_save_fcc base tmp0 tmp1 movcf2gr \tmp0, $fcc0 move \tmp1, \tmp0 movcf2gr \tmp0, $fcc1 @@ -106,7 +106,7 @@ bstrins.w \tmp1, \tmp0, 23, 16 movcf2gr \tmp0, $fcc3 bstrins.w \tmp1, \tmp0, 31, 24 - EX st.w \tmp1, \thread, THREAD_FCC + EX st.w \tmp1, \base, 0 movcf2gr \tmp0, $fcc4 move \tmp1, \tmp0 movcf2gr \tmp0, $fcc5 @@ -115,11 +115,11 @@ bstrins.w \tmp1, \tmp0, 23, 16 movcf2gr \tmp0, $fcc7 bstrins.w \tmp1, \tmp0, 31, 24 - EX st.w \tmp1, \thread, (THREAD_FCC + 4) + EX st.w \tmp1, \base, 4 .endm - .macro sc_restore_fcc thread tmp0 tmp1 - EX ld.w \tmp0, \thread, THREAD_FCC + .macro sc_restore_fcc base tmp0 tmp1 + EX ld.w \tmp0, \base, 0 bstrpick.w \tmp1, \tmp0, 7, 0 movgr2cf $fcc0, \tmp1 bstrpick.w \tmp1, \tmp0, 15, 8 @@ -128,7 +128,7 @@ movgr2cf $fcc2, \tmp1 bstrpick.w \tmp1, \tmp0, 31, 24 movgr2cf $fcc3, \tmp1 - EX ld.w \tmp0, \thread, (THREAD_FCC + 4) + EX ld.w \tmp0, \base, 4 bstrpick.w \tmp1, \tmp0, 7, 0 movgr2cf $fcc4, \tmp1 bstrpick.w \tmp1, \tmp0, 15, 8 diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 4ac1c3086152..17e88eedb154 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } + dst->thread.fpu.fcsr = src->thread.fpu.fcsr; + if (!used_math()) memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); else