]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
LoongArch: Align FPU register state to 32 bytes
authorLisa Robinson <lisa@bytefly.space>
Wed, 22 Apr 2026 07:45:11 +0000 (15:45 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Wed, 22 Apr 2026 07:45:11 +0000 (15:45 +0800)
Move fpr to the beginning of struct loongarch_fpu so it is naturally
aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context
switch performance.

Also adjust process.c and fpu.S to work well with the new loongarch_fpu
layout.

Signed-off-by: Lisa Robinson <lisa@bytefly.space>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/include/asm/processor.h
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/process.c

index c3bc44b5f5b30b2d43b901da6c339c26594e08fc..ce8b953f8c793f0ba875640bd18f9f4000e02ad8 100644 (file)
@@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32)
 BUILD_FPR_ACCESS(64)
 
 struct loongarch_fpu {
+       union fpureg    fpr[NUM_FPU_REGS];
        uint64_t        fcc;    /* 8x8 */
        uint32_t        fcsr;
        uint32_t        ftop;
-       union fpureg    fpr[NUM_FPU_REGS];
 };
 
 struct loongarch_lbt {
index f225dcc5b530c7691f282da3450ff3d8efc539fc..bf7d6b8bf600e9e5eb050a0d1621fa0d6ff0b56a 100644 (file)
@@ -97,7 +97,7 @@
        .endm
 
 #ifdef CONFIG_32BIT
-       .macro sc_save_fcc thread tmp0 tmp1
+       .macro sc_save_fcc base tmp0 tmp1
        movcf2gr        \tmp0, $fcc0
        move            \tmp1, \tmp0
        movcf2gr        \tmp0, $fcc1
        bstrins.w       \tmp1, \tmp0, 23, 16
        movcf2gr        \tmp0, $fcc3
        bstrins.w       \tmp1, \tmp0, 31, 24
-       EX      st.w    \tmp1, \thread, THREAD_FCC
+       EX      st.w    \tmp1, \base, 0
        movcf2gr        \tmp0, $fcc4
        move            \tmp1, \tmp0
        movcf2gr        \tmp0, $fcc5
        bstrins.w       \tmp1, \tmp0, 23, 16
        movcf2gr        \tmp0, $fcc7
        bstrins.w       \tmp1, \tmp0, 31, 24
-       EX      st.w    \tmp1, \thread, (THREAD_FCC + 4)
+       EX      st.w    \tmp1, \base, 4
        .endm
 
-       .macro sc_restore_fcc thread tmp0 tmp1
-       EX      ld.w    \tmp0, \thread, THREAD_FCC
+       .macro sc_restore_fcc base tmp0 tmp1
+       EX      ld.w    \tmp0, \base, 0
        bstrpick.w      \tmp1, \tmp0, 7, 0
        movgr2cf        $fcc0, \tmp1
        bstrpick.w      \tmp1, \tmp0, 15, 8
        movgr2cf        $fcc2, \tmp1
        bstrpick.w      \tmp1, \tmp0, 31, 24
        movgr2cf        $fcc3, \tmp1
-       EX      ld.w    \tmp0, \thread, (THREAD_FCC + 4)
+       EX      ld.w    \tmp0, \base, 4
        bstrpick.w      \tmp1, \tmp0, 7, 0
        movgr2cf        $fcc4, \tmp1
        bstrpick.w      \tmp1, \tmp0, 15, 8
index 4ac1c30861526dff8d8e2c05e973130ba552153b..17e88eedb154313dd320b651aa34fe8afe1fe7a4 100644 (file)
@@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
                return 0;
        }
 
+       dst->thread.fpu.fcsr =  src->thread.fpu.fcsr;
+
        if (!used_math())
                memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
        else