From: Michael Brown Date: Tue, 16 Jun 2026 15:07:09 +0000 (+0100) Subject: [crypto] Use inline assembly for bigint_grow() and bigint_shrink() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cd873a2b5d2f3ff81d16908d932e8517dc6b7daa;p=thirdparty%2Fipxe.git [crypto] Use inline assembly for bigint_grow() and bigint_shrink() The bigint_grow() and bigint_shrink() functions are used on the fast path for big integer calculations (e.g. within the X25519 Montgomery ladder step). Use inline assembly implementations of these functions on all architectures. Signed-off-by: Michael Brown --- diff --git a/src/arch/arm32/include/bits/bigint.h b/src/arch/arm32/include/bits/bigint.h index da6cfbf30..b76db7af3 100644 --- a/src/arch/arm32/include/bits/bigint.h +++ b/src/arch/arm32/include/bits/bigint.h @@ -170,10 +170,33 @@ bigint_shr_raw ( uint32_t *value0, unsigned int size ) { static inline __attribute__ (( always_inline )) void bigint_grow_raw ( const uint32_t *source0, unsigned int source_size, uint32_t *dest0, unsigned int dest_size ) { - unsigned int pad_size = ( dest_size - source_size ); + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + const uint32_t *source_end = ( source0 + source_size ); + uint32_t *dest_end = ( dest0 + dest_size ); + uint32_t *discard_source; + uint32_t *discard_dest; + uint32_t discard_source_i; - memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) ); - memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + "ldmia %0!, {%2}\n\t" + "stmia %1!, {%2}\n\t" + "teq %0, %4\n\t" + "bne 1b\n\t" + "eor %2, %2\n\t" + "b 3f\n\t" + "\n2:\n\t" + "stmia %1!, {%2}\n\t" + "\n3:\n\t" + "teq %1, %5\n\t" + "bne 2b\n\t" + : "=&l" ( discard_source ), + "=&l" ( discard_dest ), + "=&l" ( discard_source_i ), + "=m" ( *dest ) + : "l" ( source_end ), + "l" ( dest_end ), + "0" ( source0 ), "1" ( dest0 ) ); } /** @@ -187,8 +210,24 @@ bigint_grow_raw ( const uint32_t *source0, unsigned int source_size, static inline __attribute__ (( always_inline )) void bigint_shrink_raw ( const uint32_t *source0, unsigned int source_size __unused, uint32_t *dest0, unsigned int dest_size ) { + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + uint32_t *dest_end = ( dest0 + dest_size ); + uint32_t *discard_source; + uint32_t *discard_dest; + uint32_t discard_source_i; - memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + "ldmia %0!, {%2}\n\t" + "stmia %1!, {%2}\n\t" + "teq %1, %4\n\t" + "bne 1b\n\t" + : "=&l" ( discard_source ), + "=&l" ( discard_dest ), + "=&l" ( discard_source_i ), + "=m" ( *dest ) + : "l" ( dest_end ), + "0" ( source0 ), "1" ( dest0 ) ); } /** diff --git a/src/arch/arm64/include/bits/bigint.h b/src/arch/arm64/include/bits/bigint.h index 8d7a79531..f1163ef2c 100644 --- a/src/arch/arm64/include/bits/bigint.h +++ b/src/arch/arm64/include/bits/bigint.h @@ -171,10 +171,34 @@ bigint_shr_raw ( uint64_t *value0, unsigned int size ) { static inline __attribute__ (( always_inline )) void bigint_grow_raw ( const uint64_t *source0, unsigned int source_size, uint64_t *dest0, unsigned int dest_size ) { + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); unsigned int pad_size = ( dest_size - source_size ); + uint64_t *discard_source; + uint64_t *discard_dest; + uint64_t discard_source_i; + unsigned int discard_source_size; + unsigned int discard_pad_size; - memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) ); - memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + "ldr %2, [%0], #8\n\t" + "str %2, [%1], #8\n\t" + "sub %w3, %w3, #1\n\t" + "cbnz %w3, 1b\n\t" + "b 3f\n\t" + "\n2:\n\t" + "str xzr, [%1], #8\n\t" + "sub %w4, %w4, #1\n\t" + "\n3:\n\t" + "cbnz %w4, 2b\n\t" + : "=&r" ( discard_source ), + "=&r" ( discard_dest ), + "=&r" ( discard_source_i ), + "=&r" ( discard_source_size ), + "=&r" ( discard_pad_size ), + "=m" ( *dest ) + : "0" ( source0 ), "1" ( dest0 ), + "3" ( source_size ), "4" ( pad_size ) ); } /** @@ -188,8 +212,25 @@ bigint_grow_raw ( const uint64_t *source0, unsigned int source_size, static inline __attribute__ (( always_inline )) void bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused, uint64_t *dest0, unsigned int dest_size ) { + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + uint64_t *discard_source; + uint64_t *discard_dest; + uint64_t discard_source_i; + unsigned int discard_dest_size; - memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + "ldr %2, [%0], #8\n\t" + "str %2, [%1], #8\n\t" + "sub %w3, %w3, #1\n\t" + "cbnz %w3, 1b\n\t" + : "=&r" ( discard_source ), + "=&r" ( discard_dest ), + "=&r" ( discard_source_i ), + "=&r" ( discard_dest_size ), + "=m" ( *dest ) + : "0" ( source0 ), "1" ( dest0 ), + "3" ( dest_size ) ); } /** diff --git a/src/arch/loong64/include/bits/bigint.h b/src/arch/loong64/include/bits/bigint.h index b2938f849..31dd0a04e 100644 --- a/src/arch/loong64/include/bits/bigint.h +++ b/src/arch/loong64/include/bits/bigint.h @@ -214,10 +214,36 @@ bigint_shr_raw ( uint64_t *value0, unsigned int size ) { static inline __attribute__ (( always_inline )) void bigint_grow_raw ( const uint64_t *source0, unsigned int source_size, uint64_t *dest0, unsigned int dest_size ) { - unsigned int pad_size = ( dest_size - source_size ); + const bigint_t ( source_size ) __attribute__ (( may_alias )) *source = + ( ( const void * ) source0 ); + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + uint64_t discard_source_i; + uint64_t discard_offset; - memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) ); - memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + /* Copy dest[i] */ + "ldx.d %0, %3, %1\n\t" + "stx.d %0, %4, %1\n\t" + /* Loop */ + "addi.w %1, %1, 8\n\t" + "bne %1, %5, 1b\n\t" + "b 3f\n\t" + "\n2:\n\t" + /* Zero dest[i] */ + "stx.d $zero, %4, %1\n\t" + /* Loop */ + "addi.w %1, %1, 8\n\t" + "\n3:\n\t" + "bne %1, %6, 2b\n\t" + : "=&r" ( discard_source_i ), + "=&r" ( discard_offset ), + "=m" ( *dest ) + : "r" ( source0 ), + "r" ( dest0 ), + "r" ( sizeof ( *source ) ), + "r" ( sizeof ( *dest ) ), + "1" ( 0 ) ); } /** @@ -231,8 +257,25 @@ bigint_grow_raw ( const uint64_t *source0, unsigned int source_size, static inline __attribute__ (( always_inline )) void bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused, uint64_t *dest0, unsigned int dest_size ) { + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + uint64_t discard_source_i; + uint64_t discard_offset; - memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + /* Copy dest[i] */ + "ldx.d %0, %3, %1\n\t" + "stx.d %0, %4, %1\n\t" + /* Loop */ + "addi.w %1, %1, 8\n\t" + "bne %1, %5, 1b\n\t" + : "=&r" ( discard_source_i ), + "=&r" ( discard_offset ), + "=m" ( *dest ) + : "r" ( source0 ), + "r" ( dest0 ), + "r" ( sizeof ( *dest ) ), + "1" ( 0 ) ); } /** diff --git a/src/arch/riscv/include/bits/bigint.h b/src/arch/riscv/include/bits/bigint.h index 70a267bbe..07bf2fba5 100644 --- a/src/arch/riscv/include/bits/bigint.h +++ b/src/arch/riscv/include/bits/bigint.h @@ -9,9 +9,6 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); FILE_SECBOOT ( PERMITTED ); -#include -#include - /** Element of a big integer */ typedef unsigned long bigint_element_t; @@ -209,10 +206,38 @@ bigint_shr_raw ( unsigned long *value0, unsigned int size ) { static inline __attribute__ (( always_inline )) void bigint_grow_raw ( const unsigned long *source0, unsigned int source_size, unsigned long *dest0, unsigned int dest_size ) { - unsigned int pad_size = ( dest_size - source_size ); + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + const unsigned long *sourceN = ( source0 + source_size ); + unsigned long *destN = ( dest0 + dest_size ); + unsigned long *discard_source; + unsigned long *discard_dest; + unsigned long discard_source_i; - memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) ); - memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + /* Copy dest[i] */ + LOADN " %2, (%0)\n\t" + STOREN " %2, (%1)\n\t" + /* Loop */ + "addi %0, %0, %6\n\t" + "addi %1, %1, %6\n\t" + "bne %0, %4, 1b\n\t" + "j 3f\n\t" + "\n2:\n\t" + /* Zero dest[i] */ + STOREN " zero, (%1)\n\t" + /* Loop */ + "addi %1, %1, %6\n\t" + "\n3:\n\t" + "bne %1, %5, 2b\n\t" + : "=&r" ( discard_source ), + "=&r" ( discard_dest ), + "=&r" ( discard_source_i ), + "=m" ( *dest ) + : "r" ( sourceN ), + "r" ( destN ), + "i" ( sizeof ( unsigned long ) ), + "0" ( source0 ), "1" ( dest0 ) ); } /** @@ -227,8 +252,28 @@ static inline __attribute__ (( always_inline )) void bigint_shrink_raw ( const unsigned long *source0, unsigned int source_size __unused, unsigned long *dest0, unsigned int dest_size ) { + bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest = + ( ( void * ) dest0 ); + unsigned long *destN = ( dest0 + dest_size ); + unsigned long *discard_source; + unsigned long *discard_dest; + unsigned long discard_source_i; - memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) ); + __asm__ __volatile__ ( "\n1:\n\t" + /* Copy dest[i] */ + LOADN " %2, (%0)\n\t" + STOREN " %2, (%1)\n\t" + /* Loop */ + "addi %0, %0, %5\n\t" + "addi %1, %1, %5\n\t" + "bne %1, %4, 1b\n\t" + : "=&r" ( discard_source ), + "=&r" ( discard_dest ), + "=&r" ( discard_source_i ), + "=m" ( *dest ) + : "r" ( destN ), + "i" ( sizeof ( unsigned long ) ), + "0" ( source0 ), "1" ( dest0 ) ); } /** diff --git a/src/arch/s390x/include/bits/bigint.h b/src/arch/s390x/include/bits/bigint.h index 83a0923cf..57d64832f 100644 --- a/src/arch/s390x/include/bits/bigint.h +++ b/src/arch/s390x/include/bits/bigint.h @@ -8,8 +8,6 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); -#include - /** Element of a big integer */ typedef unsigned long bigint_element_t; diff --git a/src/include/ipxe/bigint.h b/src/include/ipxe/bigint.h index 790be4c1c..ababde0c7 100644 --- a/src/include/ipxe/bigint.h +++ b/src/include/ipxe/bigint.h @@ -10,6 +10,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); FILE_SECBOOT ( PERMITTED ); #include +#include /** * Define a big-integer type