]> git.ipfire.org Git - thirdparty/ipxe.git/commitdiff
[crypto] Use inline assembly for bigint_grow() and bigint_shrink()
authorMichael Brown <mcb30@ipxe.org>
Tue, 16 Jun 2026 15:07:09 +0000 (16:07 +0100)
committerMichael Brown <mcb30@ipxe.org>
Tue, 16 Jun 2026 15:07:09 +0000 (16:07 +0100)
The bigint_grow() and bigint_shrink() functions are used on the fast
path for big integer calculations (e.g. within the X25519 Montgomery
ladder step).  Use inline assembly implementations of these functions
on all architectures.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
src/arch/arm32/include/bits/bigint.h
src/arch/arm64/include/bits/bigint.h
src/arch/loong64/include/bits/bigint.h
src/arch/riscv/include/bits/bigint.h
src/arch/s390x/include/bits/bigint.h
src/include/ipxe/bigint.h

index da6cfbf30e265441e78ee4f33eedb4f0fe18d899..b76db7af3f97ad51dbcb00bfb5608b16e379ecaf 100644 (file)
@@ -170,10 +170,33 @@ bigint_shr_raw ( uint32_t *value0, unsigned int size ) {
 static inline __attribute__ (( always_inline )) void
 bigint_grow_raw ( const uint32_t *source0, unsigned int source_size,
                  uint32_t *dest0, unsigned int dest_size ) {
-       unsigned int pad_size = ( dest_size - source_size );
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       const uint32_t *source_end = ( source0 + source_size );
+       uint32_t *dest_end = ( dest0 + dest_size );
+       uint32_t *discard_source;
+       uint32_t *discard_dest;
+       uint32_t discard_source_i;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
-       memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              "ldmia %0!, {%2}\n\t"
+                              "stmia %1!, {%2}\n\t"
+                              "teq %0, %4\n\t"
+                              "bne 1b\n\t"
+                              "eor %2, %2\n\t"
+                              "b 3f\n\t"
+                              "\n2:\n\t"
+                              "stmia %1!, {%2}\n\t"
+                              "\n3:\n\t"
+                              "teq %1, %5\n\t"
+                              "bne 2b\n\t"
+                              : "=&l" ( discard_source ),
+                                "=&l" ( discard_dest ),
+                                "=&l" ( discard_source_i ),
+                                "=m" ( *dest )
+                              : "l" ( source_end ),
+                                "l" ( dest_end ),
+                                "0" ( source0 ), "1" ( dest0 ) );
 }
 
 /**
@@ -187,8 +210,24 @@ bigint_grow_raw ( const uint32_t *source0, unsigned int source_size,
 static inline __attribute__ (( always_inline )) void
 bigint_shrink_raw ( const uint32_t *source0, unsigned int source_size __unused,
                    uint32_t *dest0, unsigned int dest_size ) {
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       uint32_t *dest_end = ( dest0 + dest_size );
+       uint32_t *discard_source;
+       uint32_t *discard_dest;
+       uint32_t discard_source_i;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              "ldmia %0!, {%2}\n\t"
+                              "stmia %1!, {%2}\n\t"
+                              "teq %1, %4\n\t"
+                              "bne 1b\n\t"
+                              : "=&l" ( discard_source ),
+                                "=&l" ( discard_dest ),
+                                "=&l" ( discard_source_i ),
+                                "=m" ( *dest )
+                              : "l" ( dest_end ),
+                                "0" ( source0 ), "1" ( dest0 ) );
 }
 
 /**
index 8d7a795310b6b5156a4e381d73d5c7b357b773c7..f1163ef2cc0abac45acf78a83f1026e56d4a32a0 100644 (file)
@@ -171,10 +171,34 @@ bigint_shr_raw ( uint64_t *value0, unsigned int size ) {
 static inline __attribute__ (( always_inline )) void
 bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
                  uint64_t *dest0, unsigned int dest_size ) {
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
        unsigned int pad_size = ( dest_size - source_size );
+       uint64_t *discard_source;
+       uint64_t *discard_dest;
+       uint64_t discard_source_i;
+       unsigned int discard_source_size;
+       unsigned int discard_pad_size;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
-       memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              "ldr %2, [%0], #8\n\t"
+                              "str %2, [%1], #8\n\t"
+                              "sub %w3, %w3, #1\n\t"
+                              "cbnz %w3, 1b\n\t"
+                              "b 3f\n\t"
+                              "\n2:\n\t"
+                              "str xzr, [%1], #8\n\t"
+                              "sub %w4, %w4, #1\n\t"
+                              "\n3:\n\t"
+                              "cbnz %w4, 2b\n\t"
+                              : "=&r" ( discard_source ),
+                                "=&r" ( discard_dest ),
+                                "=&r" ( discard_source_i ),
+                                "=&r" ( discard_source_size ),
+                                "=&r" ( discard_pad_size ),
+                                "=m" ( *dest )
+                              : "0" ( source0 ), "1" ( dest0 ),
+                                "3" ( source_size ), "4" ( pad_size ) );
 }
 
 /**
@@ -188,8 +212,25 @@ bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
 static inline __attribute__ (( always_inline )) void
 bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused,
                    uint64_t *dest0, unsigned int dest_size ) {
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       uint64_t *discard_source;
+       uint64_t *discard_dest;
+       uint64_t discard_source_i;
+       unsigned int discard_dest_size;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              "ldr %2, [%0], #8\n\t"
+                              "str %2, [%1], #8\n\t"
+                              "sub %w3, %w3, #1\n\t"
+                              "cbnz %w3, 1b\n\t"
+                              : "=&r" ( discard_source ),
+                                "=&r" ( discard_dest ),
+                                "=&r" ( discard_source_i ),
+                                "=&r" ( discard_dest_size ),
+                                "=m" ( *dest )
+                              : "0" ( source0 ), "1" ( dest0 ),
+                                "3" ( dest_size ) );
 }
 
 /**
index b2938f849827dbb40534d659cfdaa51bbd500d53..31dd0a04e92c3225639027aa4e52cd1f8a77e9c9 100644 (file)
@@ -214,10 +214,36 @@ bigint_shr_raw ( uint64_t *value0, unsigned int size ) {
 static inline __attribute__ (( always_inline )) void
 bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
                  uint64_t *dest0, unsigned int dest_size ) {
-       unsigned int pad_size = ( dest_size - source_size );
+       const bigint_t ( source_size ) __attribute__ (( may_alias )) *source =
+               ( ( const void * ) source0 );
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       uint64_t discard_source_i;
+       uint64_t discard_offset;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
-       memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              /* Copy dest[i] */
+                              "ldx.d %0, %3, %1\n\t"
+                              "stx.d %0, %4, %1\n\t"
+                              /* Loop */
+                              "addi.w %1, %1, 8\n\t"
+                              "bne %1, %5, 1b\n\t"
+                              "b 3f\n\t"
+                              "\n2:\n\t"
+                              /* Zero dest[i] */
+                              "stx.d $zero, %4, %1\n\t"
+                              /* Loop */
+                              "addi.w %1, %1, 8\n\t"
+                              "\n3:\n\t"
+                              "bne %1, %6, 2b\n\t"
+                              : "=&r" ( discard_source_i ),
+                                "=&r" ( discard_offset ),
+                                "=m" ( *dest )
+                              : "r" ( source0 ),
+                                "r" ( dest0 ),
+                                "r" ( sizeof ( *source ) ),
+                                "r" ( sizeof ( *dest ) ),
+                                "1" ( 0 ) );
 }
 
 /**
@@ -231,8 +257,25 @@ bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
 static inline __attribute__ (( always_inline )) void
 bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused,
                    uint64_t *dest0, unsigned int dest_size ) {
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       uint64_t discard_source_i;
+       uint64_t discard_offset;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              /* Copy dest[i] */
+                              "ldx.d %0, %3, %1\n\t"
+                              "stx.d %0, %4, %1\n\t"
+                              /* Loop */
+                              "addi.w %1, %1, 8\n\t"
+                              "bne %1, %5, 1b\n\t"
+                              : "=&r" ( discard_source_i ),
+                                "=&r" ( discard_offset ),
+                                "=m" ( *dest )
+                              : "r" ( source0 ),
+                                "r" ( dest0 ),
+                                "r" ( sizeof ( *dest ) ),
+                                "1" ( 0 ) );
 }
 
 /**
index 70a267bbef59892c33b0cfc25d1962609e5a67e0..07bf2fba512bbdcbca2ce3460bf23ff4d4720c75 100644 (file)
@@ -9,9 +9,6 @@
 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 FILE_SECBOOT ( PERMITTED );
 
-#include <stdint.h>
-#include <string.h>
-
 /** Element of a big integer */
 typedef unsigned long bigint_element_t;
 
@@ -209,10 +206,38 @@ bigint_shr_raw ( unsigned long *value0, unsigned int size ) {
 static inline __attribute__ (( always_inline )) void
 bigint_grow_raw ( const unsigned long *source0, unsigned int source_size,
                  unsigned long *dest0, unsigned int dest_size ) {
-       unsigned int pad_size = ( dest_size - source_size );
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       const unsigned long *sourceN = ( source0 + source_size );
+       unsigned long *destN = ( dest0 + dest_size );
+       unsigned long *discard_source;
+       unsigned long *discard_dest;
+       unsigned long discard_source_i;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
-       memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              /* Copy dest[i] */
+                              LOADN " %2, (%0)\n\t"
+                              STOREN " %2, (%1)\n\t"
+                              /* Loop */
+                              "addi %0, %0, %6\n\t"
+                              "addi %1, %1, %6\n\t"
+                              "bne %0, %4, 1b\n\t"
+                              "j 3f\n\t"
+                              "\n2:\n\t"
+                              /* Zero dest[i] */
+                              STOREN " zero, (%1)\n\t"
+                              /* Loop */
+                              "addi %1, %1, %6\n\t"
+                              "\n3:\n\t"
+                              "bne %1, %5, 2b\n\t"
+                              : "=&r" ( discard_source ),
+                                "=&r" ( discard_dest ),
+                                "=&r" ( discard_source_i ),
+                                "=m" ( *dest )
+                              : "r" ( sourceN ),
+                                "r" ( destN ),
+                                "i" ( sizeof ( unsigned long ) ),
+                                "0" ( source0 ), "1" ( dest0 ) );
 }
 
 /**
@@ -227,8 +252,28 @@ static inline __attribute__ (( always_inline )) void
 bigint_shrink_raw ( const unsigned long *source0,
                    unsigned int source_size __unused,
                    unsigned long *dest0, unsigned int dest_size ) {
+       bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+               ( ( void * ) dest0 );
+       unsigned long *destN = ( dest0 + dest_size );
+       unsigned long *discard_source;
+       unsigned long *discard_dest;
+       unsigned long discard_source_i;
 
-       memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
+       __asm__ __volatile__ ( "\n1:\n\t"
+                              /* Copy dest[i] */
+                              LOADN " %2, (%0)\n\t"
+                              STOREN " %2, (%1)\n\t"
+                              /* Loop */
+                              "addi %0, %0, %5\n\t"
+                              "addi %1, %1, %5\n\t"
+                              "bne %1, %4, 1b\n\t"
+                              : "=&r" ( discard_source ),
+                                "=&r" ( discard_dest ),
+                                "=&r" ( discard_source_i ),
+                                "=m" ( *dest )
+                              : "r" ( destN ),
+                                "i" ( sizeof ( unsigned long ) ),
+                                "0" ( source0 ), "1" ( dest0 ) );
 }
 
 /**
index 83a0923cfbc13d74eb68ea1dcb073e4c6ad75c97..57d64832f0daae6cf1ae09494e528550c574b891 100644 (file)
@@ -8,8 +8,6 @@
 
 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 
-#include <stdint.h>
-
 /** Element of a big integer */
 typedef unsigned long bigint_element_t;
 
index 790be4c1cca6214ca027cd8d3c42ef0728ef27ea..ababde0c7b689af036ac4f25ddc36c65a24817d6 100644 (file)
@@ -10,6 +10,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 FILE_SECBOOT ( PERMITTED );
 
 #include <assert.h>
+#include <stdint.h>
 
 /**
  * Define a big-integer type