From: Niels Möller Date: Wed, 28 Jan 2026 13:20:37 +0000 (+0100) Subject: More consistent entry point alignment in asm files. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Ffix-asm-align;p=thirdparty%2Fnettle.git More consistent entry point alignment in asm files. --- diff --git a/ChangeLog b/ChangeLog index 077ece28..7a27d756 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2026-01-28 Niels Möller + + * arm64: Add alignment at each function prologue. + * powerpc64/p8/sha256-compress-n.asm: Add consistent alignment. + * x86/sha1-compress.asm: Likewise. + * x86_64: Likewise, several files. + 2026-01-24 Niels Möller Copy files from https://git.savannah.gnu.org/cgit/config.git/plain/ diff --git a/arm64/chacha-2core.asm b/arm64/chacha-2core.asm index e68c5364..f9454dc2 100644 --- a/arm64/chacha-2core.asm +++ b/arm64/chacha-2core.asm @@ -64,6 +64,8 @@ define(`TMP0', `v30') define(`TMP1', `v31') C _chacha_2core(uint32_t *dst, const uint32_t *src, unsigned rounds) + .text + ALIGN(16) PROLOGUE(_nettle_chacha_2core) eor X1.16b, X1.16b, X1.16b @@ -218,6 +220,8 @@ C Y3 A15 B15 A13 B13 X3 A12 B12 A14 B14 (Y3 swapped) ret EPILOGUE(_nettle_chacha_2core) + .text + ALIGN(16) PROLOGUE(_nettle_chacha_2core32) eor Y3.16b, Y3.16b, Y3.16b C {0,0,...,0} mov w3, #1 diff --git a/arm64/chacha-4core.asm b/arm64/chacha-4core.asm index 12213126..0ec81e44 100644 --- a/arm64/chacha-4core.asm +++ b/arm64/chacha-4core.asm @@ -136,6 +136,8 @@ define(`TRANSPOSE',` ') C _chacha_4core(uint32_t *dst, const uint32_t *src, unsigned rounds) + .text + ALIGN(16) PROLOGUE(_nettle_chacha_4core) mov w3, #1 @@ -225,6 +227,8 @@ C Load state and splat ret EPILOGUE(_nettle_chacha_4core) + .text + ALIGN(16) PROLOGUE(_nettle_chacha_4core32) eor TMP2.16b, TMP2.16b, TMP2.16b C Ignore counter carries b .Lshared_entry diff --git a/arm64/chacha-core-internal.asm b/arm64/chacha-core-internal.asm index 9b70e0dc..555f9021 100644 --- a/arm64/chacha-core-internal.asm +++ b/arm64/chacha-core-internal.asm @@ -81,6 +81,8 @@ define(`QROUND', ` .text C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds) + .text + ALIGN(16) PROLOGUE(_nettle_chacha_core) adr x3, .Lrot24 ld1 {ROT24.4s},[x3] diff --git a/arm64/crypto/aes128-decrypt.asm b/arm64/crypto/aes128-decrypt.asm index 70f2a64e..6b5e7ae8 100644 --- a/arm64/crypto/aes128-decrypt.asm +++ b/arm64/crypto/aes128-decrypt.asm @@ -62,6 +62,8 @@ C aes128_decrypt(const struct aes128_ctx *ctx, C size_t length, uint8_t *dst, C const uint8_t *src) + .text + ALIGN(16) PROLOGUE(nettle_aes128_decrypt) ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 diff --git a/arm64/crypto/aes128-encrypt.asm b/arm64/crypto/aes128-encrypt.asm index 3d9b9d90..66c40b7e 100644 --- a/arm64/crypto/aes128-encrypt.asm +++ b/arm64/crypto/aes128-encrypt.asm @@ -62,6 +62,8 @@ C aes128_encrypt(const struct aes128_ctx *ctx, C size_t length, uint8_t *dst, C const uint8_t *src) + .text + ALIGN(16) PROLOGUE(nettle_aes128_encrypt) ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 diff --git a/arm64/crypto/aes192-decrypt.asm b/arm64/crypto/aes192-decrypt.asm index b8eb30fd..01f1e2d5 100644 --- a/arm64/crypto/aes192-decrypt.asm +++ b/arm64/crypto/aes192-decrypt.asm @@ -64,6 +64,8 @@ C aes192_decrypt(const struct aes192_ctx *ctx, C size_t length, uint8_t *dst, C const uint8_t *src) + .text + ALIGN(16) PROLOGUE(nettle_aes192_decrypt) ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 diff --git a/arm64/crypto/aes192-encrypt.asm b/arm64/crypto/aes192-encrypt.asm index 5e57d4a4..8eca6b11 100644 --- a/arm64/crypto/aes192-encrypt.asm +++ b/arm64/crypto/aes192-encrypt.asm @@ -64,6 +64,8 @@ C aes192_encrypt(const struct aes192_ctx *ctx, C size_t length, uint8_t *dst, C const uint8_t *src) + .text + ALIGN(16) PROLOGUE(nettle_aes192_encrypt) ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 diff --git a/arm64/crypto/aes256-decrypt.asm b/arm64/crypto/aes256-decrypt.asm index dc163315..e855a0aa 100644 --- a/arm64/crypto/aes256-decrypt.asm +++ b/arm64/crypto/aes256-decrypt.asm @@ -66,6 +66,8 @@ C aes256_decrypt(const struct aes256_ctx *ctx, C size_t length, uint8_t *dst, C const uint8_t *src) + .text + ALIGN(16) PROLOGUE(nettle_aes256_decrypt) ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 diff --git a/arm64/crypto/aes256-encrypt.asm b/arm64/crypto/aes256-encrypt.asm index a6321b82..b55d2aaf 100644 --- a/arm64/crypto/aes256-encrypt.asm +++ b/arm64/crypto/aes256-encrypt.asm @@ -66,6 +66,8 @@ C aes256_encrypt(const struct aes256_ctx *ctx, C size_t length, uint8_t *dst, C const uint8_t *src) + .text + ALIGN(16) PROLOGUE(nettle_aes256_encrypt) ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 diff --git a/arm64/crypto/ghash-set-key.asm b/arm64/crypto/ghash-set-key.asm index 7e09bb43..1c3855e6 100644 --- a/arm64/crypto/ghash-set-key.asm +++ b/arm64/crypto/ghash-set-key.asm @@ -120,6 +120,8 @@ define(`PMUL_PARAM', m4_assert_numargs(3)` ext $2.16b,$2.16b,$2.16b,#8 ') + .text + ALIGN(16) PROLOGUE(_nettle_ghash_set_key) ld1 {H.2d},[KEY] diff --git a/arm64/crypto/ghash-update.asm b/arm64/crypto/ghash-update.asm index b0e7ea99..f197dc5e 100644 --- a/arm64/crypto/ghash-update.asm +++ b/arm64/crypto/ghash-update.asm @@ -112,6 +112,8 @@ define(`PMUL_SUM', m4_assert_numargs(3)` C union nettle_block16 *x, C size_t blocks, const uint8_t *data) + .text + ALIGN(16) PROLOGUE(_nettle_ghash_update) mov x4,#0xC200000000000000 mov POLY.d[0],x4 diff --git a/arm64/crypto/sha1-compress.asm b/arm64/crypto/sha1-compress.asm index de3d7b7e..050a8447 100644 --- a/arm64/crypto/sha1-compress.asm +++ b/arm64/crypto/sha1-compress.asm @@ -66,6 +66,8 @@ define(`TMP', `v21') C void nettle_sha1_compress(uint32_t *state, const uint8_t *input) + .text + ALIGN(16) PROLOGUE(nettle_sha1_compress) C Initialize constants mov w2,#0x7999 diff --git a/arm64/crypto/sha256-compress-n.asm b/arm64/crypto/sha256-compress-n.asm index 447dc590..e6101e45 100644 --- a/arm64/crypto/sha256-compress-n.asm +++ b/arm64/crypto/sha256-compress-n.asm @@ -64,6 +64,8 @@ C const uint8_t * C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, C size_t blocks, const uint8_t *input) + .text + ALIGN(16) PROLOGUE(_nettle_sha256_compress_n) cbz BLOCKS, .Lend diff --git a/powerpc64/p8/sha256-compress-n.asm b/powerpc64/p8/sha256-compress-n.asm index 60cb707a..3e788133 100644 --- a/powerpc64/p8/sha256-compress-n.asm +++ b/powerpc64/p8/sha256-compress-n.asm @@ -206,6 +206,7 @@ define(`DOLOADS', ` ') .text +define(`FUNC_ALIGN', `5') PROLOGUE(_nettle_sha256_compress_n) cmpldi NUMBLOCKS, 0 ble .done diff --git a/x86/sha1-compress.asm b/x86/sha1-compress.asm index 0f728798..67d6447f 100644 --- a/x86/sha1-compress.asm +++ b/x86/sha1-compress.asm @@ -158,7 +158,7 @@ define(`ROUND_F3', ` C nettle_sha1_compress(uint32_t *state, uint8_t *data) .text - + ALIGN(16) PROLOGUE(nettle_sha1_compress) C save all registers that need to be saved C 88(%esp) data diff --git a/x86_64/ecc-curve25519-modp.asm b/x86_64/ecc-curve25519-modp.asm index 9c4a1b9e..19e493d4 100644 --- a/x86_64/ecc-curve25519-modp.asm +++ b/x86_64/ecc-curve25519-modp.asm @@ -42,6 +42,8 @@ define(`T0', `%r10') define(`T1', `%r11') define(`M', `%rbx') + .text + ALIGN(16) PROLOGUE(_nettle_ecc_curve25519_modp) W64_ENTRY(3, 0) push %rbx diff --git a/x86_64/ecc-curve448-modp.asm b/x86_64/ecc-curve448-modp.asm index 44c3bf3e..72aed62f 100644 --- a/x86_64/ecc-curve448-modp.asm +++ b/x86_64/ecc-curve448-modp.asm @@ -46,6 +46,8 @@ define(`T0', `%r11') define(`T1', `%r12') define(`T2', `%r13') + .text + ALIGN(16) PROLOGUE(_nettle_ecc_curve448_modp) W64_ENTRY(3, 0) diff --git a/x86_64/ecc-secp224r1-modp.asm b/x86_64/ecc-secp224r1-modp.asm index 5cbc1a5d..35b13940 100644 --- a/x86_64/ecc-secp224r1-modp.asm +++ b/x86_64/ecc-secp224r1-modp.asm @@ -46,6 +46,8 @@ define(`F1', `%r10') define(`F2', `%r11') C ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) + .text + ALIGN(16) PROLOGUE(_nettle_ecc_secp224r1_modp) W64_ENTRY(3, 0) push RP diff --git a/x86_64/ecc-secp256r1-redc.asm b/x86_64/ecc-secp256r1-redc.asm index bffa7726..73d35fec 100644 --- a/x86_64/ecc-secp256r1-redc.asm +++ b/x86_64/ecc-secp256r1-redc.asm @@ -65,6 +65,8 @@ define(`FOLDC', ` sub F0, F2 sbb F1, $1 ') + .text + ALIGN(16) PROLOGUE(_nettle_ecc_secp256r1_redc) W64_ENTRY(3, 0) diff --git a/x86_64/ecc-secp384r1-modp.asm b/x86_64/ecc-secp384r1-modp.asm index fbc3a2fd..03398c47 100644 --- a/x86_64/ecc-secp384r1-modp.asm +++ b/x86_64/ecc-secp384r1-modp.asm @@ -56,6 +56,8 @@ define(`TMP', XP) C Overlap C void ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) + .text + ALIGN(16) PROLOGUE(_nettle_ecc_secp384r1_modp) W64_ENTRY(3, 0) diff --git a/x86_64/ecc-secp521r1-modp.asm b/x86_64/ecc-secp521r1-modp.asm index 00955fb5..5b6098ac 100644 --- a/x86_64/ecc-secp521r1-modp.asm +++ b/x86_64/ecc-secp521r1-modp.asm @@ -50,6 +50,8 @@ define(`U9', `%r12') define(`T0', `%r13') define(`T1', `%r14') + .text + ALIGN(16) PROLOGUE(_nettle_ecc_secp521r1_modp) W64_ENTRY(3, 0) push %rbx diff --git a/x86_64/pclmul/ghash-set-key.asm b/x86_64/pclmul/ghash-set-key.asm index 2b680ce9..2ce04d1d 100644 --- a/x86_64/pclmul/ghash-set-key.asm +++ b/x86_64/pclmul/ghash-set-key.asm @@ -47,6 +47,8 @@ define(`MASK', `%xmm7') C void _ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key) + .text + ALIGN(16) PROLOGUE(_nettle_ghash_set_key) W64_ENTRY(2, 8) movdqa .Lpolynomial(%rip), P diff --git a/x86_64/pclmul/ghash-update.asm b/x86_64/pclmul/ghash-update.asm index 917a1427..f2c72fe0 100644 --- a/x86_64/pclmul/ghash-update.asm +++ b/x86_64/pclmul/ghash-update.asm @@ -85,6 +85,8 @@ C registers left for temporaries. C union nettle_block16 *x, C size_t blocks, const uint8_t *data) + .text + ALIGN(16) PROLOGUE(_nettle_ghash_update) W64_ENTRY(4, 14) movdqa .Lpolynomial(%rip), P diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm index 63bfed3e..a67f98a7 100644 --- a/x86_64/poly1305-blocks.asm +++ b/x86_64/poly1305-blocks.asm @@ -48,6 +48,8 @@ define(`F1', `%r12') C const uint8_t * C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m) + .text + ALIGN(16) PROLOGUE(_nettle_poly1305_blocks) W64_ENTRY(3, 0) mov MP_PARAM, MP diff --git a/x86_64/poly1305-internal.asm b/x86_64/poly1305-internal.asm index 7ce415a4..eb1ac102 100644 --- a/x86_64/poly1305-internal.asm +++ b/x86_64/poly1305-internal.asm @@ -94,6 +94,8 @@ C +--+--+--+ C _poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned hi) + .text + ALIGN(16) PROLOGUE(_nettle_poly1305_block) W64_ENTRY(3, 0) push %r12 @@ -170,6 +172,8 @@ define(`H1', `%r10') define(`F0', `%r11') define(`F1', `%rrd') C Overlaps CTX + .text + ALIGN(16) PROLOGUE(_nettle_poly1305_digest) W64_ENTRY(2, 0)