Update arm files.
C arm/aes-decrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-include_src(<arm/aes.m4>)
+include_src(`arm/aes.m4')
-define(<PARAM_ROUNDS>, <r0>)
-define(<PARAM_KEYS>, <r1>)
-define(<TABLE>, <r2>)
-define(<PARAM_LENGTH>, <r3>)
+define(`PARAM_ROUNDS', `r0')
+define(`PARAM_KEYS', `r1')
+define(`TABLE', `r2')
+define(`PARAM_LENGTH', `r3')
C On stack: DST, SRC
-define(<W0>, <r4>)
-define(<W1>, <r5>)
-define(<W2>, <r6>)
-define(<W3>, <r7>)
-define(<T0>, <r8>)
-define(<COUNT>, <r10>)
-define(<KEY>, <r11>)
-
-define(<MASK>, <r0>) C Overlaps inputs, except TABLE
-define(<X0>, <r1>)
-define(<X1>, <r3>)
-define(<X2>, <r12>)
-define(<X3>, <r14>) C lr
-
-define(<FRAME_ROUNDS>, <[sp]>)
-define(<FRAME_KEYS>, <[sp, #+4]>)
-define(<FRAME_LENGTH>, <[sp, #+8]>)
+define(`W0', `r4')
+define(`W1', `r5')
+define(`W2', `r6')
+define(`W3', `r7')
+define(`T0', `r8')
+define(`COUNT', `r10')
+define(`KEY', `r11')
+
+define(`MASK', `r0') C Overlaps inputs, except TABLE
+define(`X0', `r1')
+define(`X1', `r3')
+define(`X2', `r12')
+define(`X3', `r14') C lr
+
+define(`FRAME_ROUNDS', `[sp]')
+define(`FRAME_KEYS', `[sp, #+4]')
+define(`FRAME_LENGTH', `[sp, #+8]')
C 8 saved registers
-define(<FRAME_DST>, <[sp, #+44]>)
-define(<FRAME_SRC>, <[sp, #+48]>)
+define(`FRAME_DST', `[sp, #+44]')
+define(`FRAME_SRC', `[sp, #+48]')
-define(<AES_DECRYPT_ROUND>, <
+define(`AES_DECRYPT_ROUND', `
and T0, MASK, $1, lsl #2
ldr $5, [TABLE, T0]
and T0, MASK, $2, lsl #2
eor $6, $6, $2
eor $7, $7, $3
eor $8, $8, $4
->)
+')
.file "aes-decrypt-internal.asm"
C arm/aes-encrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-include_src(<arm/aes.m4>)
+include_src(`arm/aes.m4')
C Benchmarked at at 725, 815, 990 cycles/block on cortex A9,
C for 128, 192 and 256 bit key sizes.
C Possible improvements: More efficient load and store with
C aligned accesses. Better scheduling.
-define(<PARAM_ROUNDS>, <r0>)
-define(<PARAM_KEYS>, <r1>)
-define(<TABLE>, <r2>)
-define(<PARAM_LENGTH>, <r3>)
+define(`PARAM_ROUNDS', `r0')
+define(`PARAM_KEYS', `r1')
+define(`TABLE', `r2')
+define(`PARAM_LENGTH', `r3')
C On stack: DST, SRC
-define(<W0>, <r4>)
-define(<W1>, <r5>)
-define(<W2>, <r6>)
-define(<W3>, <r7>)
-define(<T0>, <r8>)
-define(<COUNT>, <r10>)
-define(<KEY>, <r11>)
-
-define(<MASK>, <r0>) C Overlaps inputs, except TABLE
-define(<X0>, <r1>)
-define(<X1>, <r3>)
-define(<X2>, <r12>)
-define(<X3>, <r14>) C lr
-
-define(<FRAME_ROUNDS>, <[sp]>)
-define(<FRAME_KEYS>, <[sp, #+4]>)
-define(<FRAME_LENGTH>, <[sp, #+8]>)
+define(`W0', `r4')
+define(`W1', `r5')
+define(`W2', `r6')
+define(`W3', `r7')
+define(`T0', `r8')
+define(`COUNT', `r10')
+define(`KEY', `r11')
+
+define(`MASK', `r0') C Overlaps inputs, except TABLE
+define(`X0', `r1')
+define(`X1', `r3')
+define(`X2', `r12')
+define(`X3', `r14') C lr
+
+define(`FRAME_ROUNDS', `[sp]')
+define(`FRAME_KEYS', `[sp, #+4]')
+define(`FRAME_LENGTH', `[sp, #+8]')
C 8 saved registers
-define(<FRAME_DST>, <[sp, #+44]>)
-define(<FRAME_SRC>, <[sp, #+48]>)
+define(`FRAME_DST', `[sp, #+44]')
+define(`FRAME_SRC', `[sp, #+48]')
C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key)
C MASK should hold the constant 0x3fc.
-define(<AES_ENCRYPT_ROUND>, <
+define(`AES_ENCRYPT_ROUND', `
and T0, MASK, $1, lsl #2
ldr $5, [TABLE, T0]
eor $6, $6, $2
eor $7, $7, $3
eor $8, $8, $4
->)
+')
.file "aes-encrypt-internal.asm"
C Loads one word, and adds it to the subkey. Uses T0
C AES_LOAD(SRC, KEY, REG)
-define(<AES_LOAD>, <
+define(`AES_LOAD', `
ldrb $3, [$1], #+1
ldrb T0, [$1], #+1
orr $3, T0, lsl #8
orr $3, T0, lsl #24
ldr T0, [$2], #+4
eor $3, T0
->)
+')
C Stores one word. Destroys input.
C AES_STORE(DST, X)
-define(<AES_STORE>, <
+define(`AES_STORE', `
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
->)
+')
C AES_FINAL_ROUND_V6(a,b,c,d,key,res)
-define(<AES_FINAL_ROUND_V6>, <
+define(`AES_FINAL_ROUND_V6', `
uxtb T0, $1
ldrb $6, [TABLE, T0]
uxtb T0, $2, ror #8
eor $6, $6, T0, lsl #24
ldr T0, [$5], #+4
eor $6, $6, T0
->)
+')
C AES_FINAL_ROUND_V5(a,b,c,d,key,res,mask)
C Avoids the uxtb instruction, introduced in ARMv6.
C The mask argument should hold the constant 0xff
-define(<AES_FINAL_ROUND_V5>, <
+define(`AES_FINAL_ROUND_V5', `
and T0, $7, $1
ldrb $6, [TABLE, T0]
and T0, $7, $2, ror #8
eor $6, $6, T0, lsl #24
ldr T0, [$5], #+4
eor $6, T0
->)
+')
C arm/ecc-secp192r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp192r1-modp.asm"
.arm
-define(<HP>, <r0>) C Overlaps unused modulo argument
-define(<RP>, <r1>)
-
-define(<T0>, <r2>)
-define(<T1>, <r3>)
-define(<T2>, <r4>)
-define(<T3>, <r5>)
-define(<T4>, <r6>)
-define(<T5>, <r7>)
-define(<T6>, <r8>)
-define(<T7>, <r10>)
-define(<H0>, <T0>) C Overlaps T0 and T1
-define(<H1>, <T1>)
-define(<C2>, <HP>)
-define(<C4>, <r12>)
+define(`HP', `r0') C Overlaps unused modulo argument
+define(`RP', `r1')
+
+define(`T0', `r2')
+define(`T1', `r3')
+define(`T2', `r4')
+define(`T3', `r5')
+define(`T4', `r6')
+define(`T5', `r7')
+define(`T6', `r8')
+define(`T7', `r10')
+define(`H0', `T0') C Overlaps T0 and T1
+define(`H1', `T1')
+define(`C2', `HP')
+define(`C4', `r12')
C ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
C arm/ecc-secp224r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp224r1-modp.asm"
.arm
-define(<RP>, <r1>)
-define(<H>, <r0>) C Overlaps unused modulo argument
-
-define(<T0>, <r2>)
-define(<T1>, <r3>)
-define(<T2>, <r4>)
-define(<T3>, <r5>)
-define(<T4>, <r6>)
-define(<T5>, <r7>)
-define(<T6>, <r8>)
-define(<N3>, <r10>)
-define(<L0>, <r11>)
-define(<L1>, <r12>)
-define(<L2>, <lr>)
+define(`RP', `r1')
+define(`H', `r0') C Overlaps unused modulo argument
+
+define(`T0', `r2')
+define(`T1', `r3')
+define(`T2', `r4')
+define(`T3', `r5')
+define(`T4', `r6')
+define(`T5', `r7')
+define(`T6', `r8')
+define(`N3', `r10')
+define(`L0', `r11')
+define(`L1', `r12')
+define(`L2', `lr')
C ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
C arm/ecc-secp256r1-redc.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp256r1-redc.asm"
.arm
-define(<RP>, <r1>)
-
-define(<T0>, <r0>) C Overlaps unused modulo argument
-define(<T1>, <r2>)
-define(<T2>, <r3>)
-define(<T3>, <r4>)
-define(<T4>, <r5>)
-define(<T5>, <r6>)
-define(<T6>, <r7>)
-define(<T7>, <r8>)
-define(<F0>, <r10>)
-define(<F1>, <r11>)
-define(<F2>, <r12>)
-define(<F3>, <lr>)
+define(`RP', `r1')
+
+define(`T0', `r0') C Overlaps unused modulo argument
+define(`T1', `r2')
+define(`T2', `r3')
+define(`T3', `r4')
+define(`T4', `r5')
+define(`T5', `r6')
+define(`T6', `r7')
+define(`T7', `r8')
+define(`F0', `r10')
+define(`F1', `r11')
+define(`F2', `r12')
+define(`F3', `lr')
C ecc_secp256r1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
.text
C arm/ecc-secp384r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp384r1-modp.asm"
.arm
-define(<RP>, <r1>)
-define(<T0>, <r0>)
-define(<T1>, <r2>)
-define(<T2>, <r3>)
-define(<T3>, <r4>)
-define(<F0>, <r5>)
-define(<F1>, <r6>)
-define(<F2>, <r7>)
-define(<F3>, <r8>)
-define(<F4>, <r10>)
-define(<N>, <r12>)
-define(<H>, <lr>)
+define(`RP', `r1')
+define(`T0', `r0')
+define(`T1', `r2')
+define(`T2', `r3')
+define(`T3', `r4')
+define(`F0', `r5')
+define(`F1', `r6')
+define(`F2', `r7')
+define(`F3', `r8')
+define(`F4', `r10')
+define(`N', `r12')
+define(`H', `lr')
C ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
C arm/ecc-secp521r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp521r1-modp.asm"
.arm
-define(<HP>, <r0>)
-define(<RP>, <r1>)
-define(<T0>, <r2>)
-define(<T1>, <r3>)
-define(<T2>, <r4>)
-define(<F0>, <r5>)
-define(<F1>, <r6>)
-define(<F2>, <r7>)
-define(<F3>, <r8>)
-define(<H>, <r12>)
-define(<N>, <lr>)
+define(`HP', `r0')
+define(`RP', `r1')
+define(`T0', `r2')
+define(`T1', `r3')
+define(`T2', `r4')
+define(`F0', `r5')
+define(`F1', `r6')
+define(`F2', `r7')
+define(`F3', `r8')
+define(`H', `r12')
+define(`N', `lr')
C ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
C arm/fat/aes-decrypt-internal-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_armv6>)
-include_src(<arm/v6/aes-decrypt-internal.asm>)
+define(`fat_transform', `$1_armv6')
+include_src(`arm/v6/aes-decrypt-internal.asm')
C arm/fat/aes-decrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_arm>)
-include_src(<arm/aes-decrypt-internal.asm>)
+define(`fat_transform', `$1_arm')
+include_src(`arm/aes-decrypt-internal.asm')
C arm/fat/aes-encrypt-internal-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_armv6>)
-include_src(<arm/v6/aes-encrypt-internal.asm>)
+define(`fat_transform', `$1_armv6')
+include_src(`arm/v6/aes-encrypt-internal.asm')
C arm/fat/aes-encrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_arm>)
-include_src(<arm/aes-encrypt-internal.asm>)
+define(`fat_transform', `$1_arm')
+include_src(`arm/aes-encrypt-internal.asm')
C arm/fat/chacha-3core.asm
-ifelse(<
+ifelse(`
Copyright (C) 2020 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_fat_chacha_3core) picked up by configure
-include_src(<arm/neon/chacha-3core.asm>)
+include_src(`arm/neon/chacha-3core.asm')
C arm/fat/chacha-core-internal-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_chacha_core) picked up by configure
-define(<fat_transform>, <$1_neon>)
-include_src(<arm/neon/chacha-core-internal.asm>)
+define(`fat_transform', `$1_neon')
+include_src(`arm/neon/chacha-core-internal.asm')
C arm/fat/salsa20-2core.asm
-ifelse(<
+ifelse(`
Copyright (C) 2020 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_fat_salsa20_2core) picked up by configure
-include_src(<arm/neon/salsa20-2core.asm>)
+include_src(`arm/neon/salsa20-2core.asm')
C arm/fat/salsa20-core-internal-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_salsa20_core) picked up by configure
-define(<fat_transform>, <$1_neon>)
-include_src(<arm/neon/salsa20-core-internal.asm>)
+define(`fat_transform', `$1_neon')
+include_src(`arm/neon/salsa20-core-internal.asm')
C arm/fat/sha1-compress-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(nettle_sha1_compress) picked up by configure
-define(<fat_transform>, <_$1_armv6>)
-include_src(<arm/v6/sha1-compress.asm>)
+define(`fat_transform', `_$1_armv6')
+include_src(`arm/v6/sha1-compress.asm')
C arm/fat/sha256-compress-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
-define(<fat_transform>, <$1_armv6>)
-include_src(<arm/v6/sha256-compress.asm>)
+define(`fat_transform', `$1_armv6')
+include_src(`arm/v6/sha256-compress.asm')
C arm/fat/sha3-permute-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_sha3_permute) picked up by configure
-define(<fat_transform>, <_$1_neon>)
-include_src(<arm/neon/sha3-permute.asm>)
+define(`fat_transform', `_$1_neon')
+include_src(`arm/neon/sha3-permute.asm')
C arm/fat/sha3-compress-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_sha512_compress) picked up by configure
-define(<fat_transform>, <$1_neon>)
-include_src(<arm/neon/sha512-compress.asm>)
+define(`fat_transform', `$1_neon')
+include_src(`arm/neon/sha512-compress.asm')
C arm/fat/umac-nh-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_umac_nh) picked up by configure
-define(<fat_transform>, <$1_neon>)
-include_src(<arm/neon/umac-nh.asm>)
+define(`fat_transform', `$1_neon')
+include_src(`arm/neon/umac-nh.asm')
C arm/fat/umac-nh-n-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
dnl PROLOGUE(_nettle_umac_nh_n) picked up by configure
-define(<fat_transform>, <$1_neon>)
-include_src(<arm/neon/umac-nh-n.asm>)
+define(`fat_transform', `$1_neon')
+include_src(`arm/neon/umac-nh-n.asm')
-define(<QREG>, <ifelse(
+define(`QREG', `ifelse(
$1, d0, q0,
$1, d2, q1,
$1, d4, q2,
$1, d26, q13,
$1, d28, q14,
$1, d30, q15,
- <NO REGISTER>)>)dnl
+ `NO REGISTER')')dnl
-define(<D0REG>, <ifelse(
+define(`D0REG', `ifelse(
$1, q0, d0,
$1, q1, d2,
$1, q2, d4,
$1, q13, d26,
$1, q14, d28,
$1, q15, d30,
- <NO REGISTER>)>)dnl
+ `NO REGISTER')')dnl
-define(<D1REG>, <ifelse(
+define(`D1REG', `ifelse(
$1, q0, d1,
$1, q1, d3,
$1, q2, d5,
$1, q13, d27,
$1, q14, d29,
$1, q15, d31,
- <NO REGISTER>)>)dnl
+ `NO REGISTER')')dnl
C arm/memxor.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Possible speedups:
C
C Register usage:
-define(<DST>, <r0>)
-define(<SRC>, <r1>)
-define(<N>, <r2>)
-define(<CNT>, <r6>)
-define(<TNC>, <r12>)
+define(`DST', `r0')
+define(`SRC', `r1')
+define(`N', `r2')
+define(`CNT', `r6')
+define(`TNC', `r12')
C little-endian and big-endian need to shift in different directions for
C alignment correction
-define(<S0ADJ>, IF_LE(<lsr>, <lsl>))
-define(<S1ADJ>, IF_LE(<lsl>, <lsr>))
+define(`S0ADJ', IF_LE(`lsr', `lsl'))
+define(`S1ADJ', IF_LE(`lsl', `lsr'))
.syntax unified
C Store bytes, one by one.
.Lmemxor_leftover:
C bring uppermost byte down for saving while preserving lower ones
-IF_BE(< ror r3, #24>)
+IF_BE(` ror r3, #24')
strb r3, [DST], #+1
subs N, #1
beq .Lmemxor_done
subs TNC, #8
C bring down next byte, no need to preserve
-IF_LE(< lsr r3, #8>)
+IF_LE(` lsr r3, #8')
bne .Lmemxor_leftover
b .Lmemxor_bytes
.Lmemxor_odd_done:
C arm/memxor3.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013, 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Possible speedups:
C
C Register usage:
-define(<DST>, <r0>)
-define(<AP>, <r1>)
-define(<BP>, <r2>)
-define(<N>, <r3>)
+define(`DST', `r0')
+define(`AP', `r1')
+define(`BP', `r2')
+define(`N', `r3')
C Temporaries r4-r7
-define(<ACNT>, <r8>)
-define(<ATNC>, <r10>)
-define(<BCNT>, <r11>)
-define(<BTNC>, <r12>)
+define(`ACNT', `r8')
+define(`ATNC', `r10')
+define(`BCNT', `r11')
+define(`BTNC', `r12')
C little-endian and big-endian need to shift in different directions for
C alignment correction
-define(<S0ADJ>, IF_LE(<lsr>, <lsl>))
-define(<S1ADJ>, IF_LE(<lsl>, <lsr>))
+define(`S0ADJ', IF_LE(`lsr', `lsl'))
+define(`S1ADJ', IF_LE(`lsl', `lsr'))
.syntax unified
.Lmemxor3_au_leftover:
C Store a byte at a time
C bring uppermost byte down for saving while preserving lower ones
-IF_LE(< ror r4, #24>)
+IF_LE(` ror r4, #24')
strb r4, [DST, #-1]!
subs N, #1
beq .Lmemxor3_done
subs ACNT, #8
C bring down next byte, no need to preserve
-IF_BE(< lsr r4, #8>)
+IF_BE(` lsr r4, #8')
sub AP, #1
bne .Lmemxor3_au_leftover
b .Lmemxor3_bytes
C Leftover bytes in r4, low end on LE and high end on BE before
C preparatory alignment correction
-IF_LE(< ror r4, ACNT>)
-IF_BE(< ror r4, ATNC>)
+IF_LE(` ror r4, ACNT')
+IF_BE(` ror r4, ATNC')
C now byte-aligned in high end on LE and low end on BE because we're
C working downwards in saving the very first bytes of the buffer
.Lmemxor3_uu_leftover:
C bring uppermost byte down for saving while preserving lower ones
-IF_LE(< ror r4, #24>)
+IF_LE(` ror r4, #24')
strb r4, [DST, #-1]!
subs N, #1
beq .Lmemxor3_done
subs ACNT, #8
C bring down next byte, no need to preserve
-IF_BE(< lsr r4, #8>)
+IF_BE(` lsr r4, #8')
bne .Lmemxor3_uu_leftover
b .Lmemxor3_bytes
C arm/neon/chacha-3core.asm
-ifelse(<
+ifelse(`
Copyright (C) 2020 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "chacha-3core.asm"
.fpu neon
-define(<DST>, <r0>)
-define(<SRC>, <r1>)
-define(<ROUNDS>, <r2>)
+define(`DST', `r0')
+define(`SRC', `r1')
+define(`ROUNDS', `r2')
C State, X, Y and Z representing consecutive blocks
-define(<X0>, <q0>)
-define(<X1>, <q1>)
-define(<X2>, <q2>)
-define(<X3>, <q3>)
-define(<Y0>, <q8>)
-define(<Y1>, <q9>)
-define(<Y2>, <q10>)
-define(<Y3>, <q11>)
-define(<Z0>, <q12>)
-define(<Z1>, <q13>)
-define(<Z2>, <q14>)
-define(<Z3>, <q15>)
-
-define(<T0>, <q4>)
-define(<T1>, <q5>)
-define(<T2>, <q6>)
-define(<T3>, <q7>)
+define(`X0', `q0')
+define(`X1', `q1')
+define(`X2', `q2')
+define(`X3', `q3')
+define(`Y0', `q8')
+define(`Y1', `q9')
+define(`Y2', `q10')
+define(`Y3', `q11')
+define(`Z0', `q12')
+define(`Z1', `q13')
+define(`Z2', `q14')
+define(`Z3', `q15')
+
+define(`T0', `q4')
+define(`T1', `q5')
+define(`T2', `q6')
+define(`T3', `q7')
.text
.align 4
C arm/neon/chacha-core-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013, 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "chacha-core-internal.asm"
.fpu neon
-define(<DST>, <r0>)
-define(<SRC>, <r1>)
-define(<ROUNDS>, <r2>)
-
-define(<X0>, <q0>)
-define(<X1>, <q1>)
-define(<X2>, <q2>)
-define(<X3>, <q3>)
-define(<T0>, <q8>)
-define(<S0>, <q12>)
-define(<S1>, <q13>)
-define(<S2>, <q14>)
-define(<S3>, <q15>)
-
-define(<QROUND>, <
+define(`DST', `r0')
+define(`SRC', `r1')
+define(`ROUNDS', `r2')
+
+define(`X0', `q0')
+define(`X1', `q1')
+define(`X2', `q2')
+define(`X3', `q3')
+define(`T0', `q8')
+define(`S0', `q12')
+define(`S1', `q13')
+define(`S2', `q14')
+define(`S3', `q15')
+
+define(`QROUND', `
C x0 += x1, x3 ^= x0, x3 lrot 16
C x2 += x3, x1 ^= x2, x1 lrot 12
C x0 += x1, x3 ^= x0, x3 lrot 8
vshl.i32 T0, $2, #7
vshr.u32 $2, $2, #25
veor $2, $2, T0
->)
+')
.text
.align 4
C 12 15 14 13 >>> 3
C different number of elements needs to be
C extracted on BE because of different column order
-IF_LE(< vext.32 X1, X1, X1, #1>)
-IF_BE(< vext.32 X1, X1, X1, #3>)
+IF_LE(` vext.32 X1, X1, X1, #1')
+IF_BE(` vext.32 X1, X1, X1, #3')
vext.32 X2, X2, X2, #2
-IF_LE(< vext.32 X3, X3, X3, #3>)
-IF_BE(< vext.32 X3, X3, X3, #1>)
+IF_LE(` vext.32 X3, X3, X3, #3')
+IF_BE(` vext.32 X3, X3, X3, #1')
QROUND(X0, X1, X2, X3)
subs ROUNDS, ROUNDS, #2
C Inverse rotation
-IF_LE(< vext.32 X1, X1, X1, #3>)
-IF_BE(< vext.32 X1, X1, X1, #1>)
+IF_LE(` vext.32 X1, X1, X1, #3')
+IF_BE(` vext.32 X1, X1, X1, #1')
vext.32 X2, X2, X2, #2
-IF_LE(< vext.32 X3, X3, X3, #1>)
-IF_BE(< vext.32 X3, X3, X3, #3>)
+IF_LE(` vext.32 X3, X3, X3, #1')
+IF_BE(` vext.32 X3, X3, X3, #3')
bhi .Loop
vadd.u32 X3, X3, S3
C caller expects result little-endian
-IF_BE(< vrev32.u8 X0, X0
+IF_BE(` vrev32.u8 X0, X0
vrev32.u8 X1, X1
vrev32.u8 X2, X2
- vrev32.u8 X3, X3>)
+ vrev32.u8 X3, X3')
vstm DST, {X0,X1,X2,X3}
bx lr
C arm/neon/salsa20-2core.asm
-ifelse(<
+ifelse(`
Copyright (C) 2020 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "salsa20-2core.asm"
.fpu neon
-define(<DST>, <r0>)
-define(<SRC>, <r1>)
-define(<ROUNDS>, <r2>)
+define(`DST', `r0')
+define(`SRC', `r1')
+define(`ROUNDS', `r2')
C State, even elements in X, odd elements in Y
-define(<X0>, <q0>)
-define(<X1>, <q1>)
-define(<X2>, <q2>)
-define(<X3>, <q3>)
-define(<Y0>, <q8>)
-define(<Y1>, <q9>)
-define(<Y2>, <q10>)
-define(<Y3>, <q11>)
-define(<T0>, <q12>)
-define(<T1>, <q13>)
-define(<T2>, <q14>)
-define(<T3>, <q15>)
+define(`X0', `q0')
+define(`X1', `q1')
+define(`X2', `q2')
+define(`X3', `q3')
+define(`Y0', `q8')
+define(`Y1', `q9')
+define(`Y2', `q10')
+define(`Y3', `q11')
+define(`T0', `q12')
+define(`T1', `q13')
+define(`T2', `q14')
+define(`T3', `q15')
.text
.align 4
C arm/neon/salsa20-core-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "salsa20-core-internal.asm"
.fpu neon
-define(<DST>, <r0>)
-define(<SRC>, <r1>)
-define(<ROUNDS>, <r2>)
-
-define(<X0>, <q0>)
-define(<X1>, <q1>)
-define(<X2>, <q2>)
-define(<X3>, <q3>)
-define(<T0>, <q8>)
-define(<T1>, <q9>)
-define(<M0101>, <q10>)
-define(<M0110>, <q11>)
-define(<M0011>, <q12>)
-define(<S1>, <q13>)
-define(<S2>, <q14>)
-define(<S3>, <q15>)
+define(`DST', `r0')
+define(`SRC', `r1')
+define(`ROUNDS', `r2')
+
+define(`X0', `q0')
+define(`X1', `q1')
+define(`X2', `q2')
+define(`X3', `q3')
+define(`T0', `q8')
+define(`T1', `q9')
+define(`M0101', `q10')
+define(`M0110', `q11')
+define(`M0011', `q12')
+define(`S1', `q13')
+define(`S2', `q14')
+define(`S3', `q15')
-define(<QROUND>, <
+define(`QROUND', `
vadd.i32 T0, $1, $4
vshl.i32 T1, T0, #7
vshr.u32 T0, T0, #25
vshr.u32 T0, T0, #14
veor $1, $1, T0
veor $1, $1, T1
->)
+')
.text
.align 4
C 6 1 12 11 >>> 1
C different number of elements needs to be
C extracted on BE because of different column order
-IF_LE(< vext.32 X1, X1, X1, #3>)
-IF_BE(< vext.32 X1, X1, X1, #1>)
+IF_LE(` vext.32 X1, X1, X1, #3')
+IF_BE(` vext.32 X1, X1, X1, #1')
vext.32 X2, X2, X2, #2
-IF_LE(< vext.32 X3, X3, X3, #1>)
-IF_BE(< vext.32 X3, X3, X3, #3>)
+IF_LE(` vext.32 X3, X3, X3, #1')
+IF_BE(` vext.32 X3, X3, X3, #3')
QROUND(X0, X3, X2, X1)
subs ROUNDS, ROUNDS, #2
C Inverse rotation
-IF_LE(< vext.32 X1, X1, X1, #1>)
-IF_BE(< vext.32 X1, X1, X1, #3>)
+IF_LE(` vext.32 X1, X1, X1, #1')
+IF_BE(` vext.32 X1, X1, X1, #3')
vext.32 X2, X2, X2, #2
-IF_LE(< vext.32 X3, X3, X3, #3>)
-IF_BE(< vext.32 X3, X3, X3, #1>)
+IF_LE(` vext.32 X3, X3, X3, #3')
+IF_BE(` vext.32 X3, X3, X3, #1')
bhi .Loop
vadd.u32 X3, X3, S3
C caller expects result little-endian
-IF_BE(< vrev32.u8 X0, X0
+IF_BE(` vrev32.u8 X0, X0
vrev32.u8 X1, X1
vrev32.u8 X2, X2
- vrev32.u8 X3, X3>)
+ vrev32.u8 X3, X3')
vstm DST, {X0,X1,X2,X3}
bx lr
C arm/neon/sha3-permute.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha3-permute.asm"
.fpu neon
-define(<CTX>, <r0>)
-define(<COUNT>, <r1>)
-define(<RC>, <r2>)
+define(`CTX', `r0')
+define(`COUNT', `r1')
+define(`RC', `r2')
C First column
-define(<A0>, <d0>)
-define(<A5>, <d2>)
-define(<A10>, <d3>)
-define(<A15>, <d4>)
-define(<A20>, <d5>)
-
-define(<A1>, <d6>)
-define(<A2>, <d7>)
-define(<A3>, <d8>)
-define(<A4>, <d9>)
-
-define(<A6>, <d16>)
-define(<A7>, <d17>)
-define(<A8>, <d18>)
-define(<A9>, <d19>)
-
-define(<A11>, <d20>)
-define(<A12>, <d21>)
-define(<A13>, <d22>)
-define(<A14>, <d23>)
-
-define(<A16>, <d24>)
-define(<A17>, <d25>)
-define(<A18>, <d26>)
-define(<A19>, <d27>)
-
-define(<A21>, <d28>)
-define(<A22>, <d29>)
-define(<A23>, <d30>)
-define(<A24>, <d31>)
-
-define(<T0>, <d10>)
-define(<T1>, <d11>)
-
-define(<C0>, <d1>)
-define(<C1>, <d12>)
-define(<C2>, <d13>)
-define(<C3>, <d14>)
-define(<C4>, <d15>)
+define(`A0', `d0')
+define(`A5', `d2')
+define(`A10', `d3')
+define(`A15', `d4')
+define(`A20', `d5')
+
+define(`A1', `d6')
+define(`A2', `d7')
+define(`A3', `d8')
+define(`A4', `d9')
+
+define(`A6', `d16')
+define(`A7', `d17')
+define(`A8', `d18')
+define(`A9', `d19')
+
+define(`A11', `d20')
+define(`A12', `d21')
+define(`A13', `d22')
+define(`A14', `d23')
+
+define(`A16', `d24')
+define(`A17', `d25')
+define(`A18', `d26')
+define(`A19', `d27')
+
+define(`A21', `d28')
+define(`A22', `d29')
+define(`A23', `d30')
+define(`A24', `d31')
+
+define(`T0', `d10')
+define(`T1', `d11')
+
+define(`C0', `d1')
+define(`C1', `d12')
+define(`C2', `d13')
+define(`C3', `d14')
+define(`C4', `d15')
C ROL(DST, SRC, COUNT)
C Must have SRC != DST
-define(<ROL>, <
+define(`ROL', `
vshr.u64 $1, $2, #eval(64-$3)
vsli.i64 $1, $2, #$3
- >)
+ ')
C sha3_permute(struct sha3_ctx *ctx)
.text
C arm/neon/sha512-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha512-compress.asm"
.fpu neon
-define(<STATE>, <r0>)
-define(<INPUT>, <r1>)
-define(<K>, <r2>)
-define(<COUNT>, <r3>)
-define(<SHIFT>, <r12>)
-
-define(<SA>, <d0>)
-define(<SB>, <d1>)
-define(<SC>, <d2>)
-define(<SD>, <d3>)
-define(<SE>, <d4>)
-define(<SF>, <d5>)
-define(<SG>, <d6>)
-define(<SH>, <d7>)
-define(<QSAB>, <q0>)
-define(<QSCD>, <q1>)
-define(<QSEF>, <q2>)
-define(<QSGH>, <q3>)
+define(`STATE', `r0')
+define(`INPUT', `r1')
+define(`K', `r2')
+define(`COUNT', `r3')
+define(`SHIFT', `r12')
+
+define(`SA', `d0')
+define(`SB', `d1')
+define(`SC', `d2')
+define(`SD', `d3')
+define(`SE', `d4')
+define(`SF', `d5')
+define(`SG', `d6')
+define(`SH', `d7')
+define(`QSAB', `q0')
+define(`QSCD', `q1')
+define(`QSEF', `q2')
+define(`QSGH', `q3')
C d8-d15 are callee-save
-define(<DT0>, <d8>)
-define(<DT1>, <d9>)
-define(<QT01>, <q4>)
-define(<DT2>, <d10>)
-define(<DT3>, <d11>)
-define(<QT23>, <q5>)
-define(<DT4>, <d12>)
-define(<DT5>, <d13>)
-define(<QT45>, <q6>)
+define(`DT0', `d8')
+define(`DT1', `d9')
+define(`QT01', `q4')
+define(`DT2', `d10')
+define(`DT3', `d11')
+define(`QT23', `q5')
+define(`DT4', `d12')
+define(`DT5', `d13')
+define(`QT45', `q6')
C Used only when reading the input, can overlap with state
-define(<DT6>, <d0>)
-define(<DT7>, <d1>)
-define(<QT67>, <q0>)
-
-define(<DW0>, <d16>)
-define(<DW1>, <d17>)
-define(<DW2>, <d18>)
-define(<DW3>, <d19>)
-define(<DW4>, <d20>)
-define(<DW5>, <d21>)
-define(<DW6>, <d22>)
-define(<DW7>, <d23>)
-define(<DW8>, <d24>)
-define(<DW9>, <d25>)
-define(<DW10>, <d26>)
-define(<DW11>, <d27>)
-define(<DW12>, <d28>)
-define(<DW13>, <d29>)
-define(<DW14>, <d30>)
-define(<DW15>, <d31>)
-define(<QW0001>, <q8>)
-define(<QW0203>, <q9>)
-define(<QW0405>, <q10>)
-define(<QW0607>, <q11>)
-define(<QW0809>, <q12>)
-define(<QW1011>, <q13>)
-define(<QW1213>, <q14>)
-define(<QW1415>, <q15>)
-
-define(<EXPAND_ME>, <$1>)
-define(<W>, <EXPAND_ME(<DW>eval(($1) % 16))>)
+define(`DT6', `d0')
+define(`DT7', `d1')
+define(`QT67', `q0')
+
+define(`DW0', `d16')
+define(`DW1', `d17')
+define(`DW2', `d18')
+define(`DW3', `d19')
+define(`DW4', `d20')
+define(`DW5', `d21')
+define(`DW6', `d22')
+define(`DW7', `d23')
+define(`DW8', `d24')
+define(`DW9', `d25')
+define(`DW10', `d26')
+define(`DW11', `d27')
+define(`DW12', `d28')
+define(`DW13', `d29')
+define(`DW14', `d30')
+define(`DW15', `d31')
+define(`QW0001', `q8')
+define(`QW0203', `q9')
+define(`QW0405', `q10')
+define(`QW0607', `q11')
+define(`QW0809', `q12')
+define(`QW1011', `q13')
+define(`QW1213', `q14')
+define(`QW1415', `q15')
+
+define(`EXPAND_ME', `$1')
+define(`W', `EXPAND_ME(`DW'eval(($1) % 16))')
C If x = W(i+14), y = w(i+1), we xor in parallel
C
C xor x >> 6 y >> 7
C -----------------------------
C DT0 DT1
-define(<EXPN>, <
+define(`EXPN', `
vshl.i64 DT0, W($1+14), #45
vshl.i64 DT1, W($1 + 1), #63
vshr.u64 DT2, W($1+14), #19
veor.i64 QT01, QT01, QT45
vadd.i64 W($1), W($1), DT0
vadd.i64 W($1), W($1), DT1
->)
+')
C ROUND(A,B,C,D,E,F,G,H,i)
C
C xor e >> 41 a >> 39
C ----------------------------
C DT0 DT1
-define(<ROUND>, <
+define(`ROUND', `
vshl.i64 DT0, $5, #50
vshl.i64 DT1, $1, #36
vshr.u64 DT2, $5, #14
vadd.i64 DT1, DT1, DT2
vadd.i64 $4, $4, $8
vadd.i64 $8, $8, DT1
->)
+')
C void
C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k)
C arm/neon/umac-nh-n.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "umac-nh.asm"
.fpu neon
-define(<OUT>, <r0>)
-define(<ITERS>, <r1>)
-define(<KEY>, <r2>)
-define(<LENGTH>, <r3>)
-define(<MSG>, <r12>)
-define(<SHIFT>, <r14>)
-
-define(<QA>, <q0>)
-define(<QB>, <q1>)
-define(<QY0>, <q3>) C Accumulates for the first two operations.
-define(<DM>, <d4>)
-define(<QY1>, <q4>) C Used for 3 and 4 iterations.
-define(<QC>, <q5>)
-define(<QD>, <q6>)
-define(<QLEFT>, <q8>)
-define(<QRIGHT>, <q9>)
-define(<QT0>, <q10>)
-define(<QT1>, <q11>)
-define(<QT2>, <q12>)
-define(<QK0>, <q13>)
-define(<QK1>, <q14>)
-define(<QK2>, <q15>)
+define(`OUT', `r0')
+define(`ITERS', `r1')
+define(`KEY', `r2')
+define(`LENGTH', `r3')
+define(`MSG', `r12')
+define(`SHIFT', `r14')
+
+define(`QA', `q0')
+define(`QB', `q1')
+define(`QY0', `q3') C Accumulates for the first two operations.
+define(`DM', `d4')
+define(`QY1', `q4') C Used for 3 and 4 iterations.
+define(`QC', `q5')
+define(`QD', `q6')
+define(`QLEFT', `q8')
+define(`QRIGHT', `q9')
+define(`QT0', `q10')
+define(`QT1', `q11')
+define(`QT2', `q12')
+define(`QK0', `q13')
+define(`QK1', `q14')
+define(`QK2', `q15')
C FIXME: Try permuting subkeys using vld4, vzip or similar.
C arm/neon/umac-nh.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "umac-nh.asm"
.fpu neon
-define(<KEY>, <r0>)
-define(<LENGTH>, <r1>)
-define(<MSG>, <r2>)
-define(<SHIFT>, <r3>)
-
-define(<QA>, <q0>)
-define(<QB>, <q1>)
-define(<DM>, <d16>)
-define(<QLEFT>, <q9>)
-define(<QRIGHT>, <q10>)
-define(<QY>, <q11>)
-define(<QT0>, <q12>)
-define(<QT1>, <q13>)
-define(<QK0>, <q14>)
-define(<QK1>, <q15>)
+define(`KEY', `r0')
+define(`LENGTH', `r1')
+define(`MSG', `r2')
+define(`SHIFT', `r3')
+
+define(`QA', `q0')
+define(`QB', `q1')
+define(`DM', `d16')
+define(`QLEFT', `q9')
+define(`QRIGHT', `q10')
+define(`QY', `q11')
+define(`QT0', `q12')
+define(`QT1', `q13')
+define(`QK0', `q14')
+define(`QK1', `q15')
.text
.align 3
vadd.i64 D0REG(QY), D0REG(QY), D1REG(QY)
C return value needs to respect word order mandated by AAPCS
-IF_LE(< vmov r0, r1, D0REG(QY)>)
-IF_BE(< vmov r1, r0, D0REG(QY)>)
+IF_LE(` vmov r0, r1, D0REG(QY)')
+IF_BE(` vmov r1, r0, D0REG(QY)')
bx lr
EPILOGUE(_nettle_umac_nh)
C arm/v6/aes-decrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.arch armv6
-include_src(<arm/aes.m4>)
+include_src(`arm/aes.m4')
-define(<PARAM_ROUNDS>, <r0>)
-define(<PARAM_KEYS>, <r1>)
-define(<TABLE>, <r2>)
-define(<LENGTH>, <r3>)
+define(`PARAM_ROUNDS', `r0')
+define(`PARAM_KEYS', `r1')
+define(`TABLE', `r2')
+define(`LENGTH', `r3')
C On stack: DST, SRC
-define(<W0>, <r4>)
-define(<W1>, <r5>)
-define(<W2>, <r6>)
-define(<W3>, <r7>)
-define(<T0>, <r8>)
-define(<COUNT>, <r10>)
-define(<KEY>, <r11>)
-
-define(<X0>, <r0>) C Overlaps PARAM_ROUNDS and PARAM_KEYS
-define(<X1>, <r1>)
-define(<X2>, <r12>)
-define(<X3>, <r14>) C lr
-
-define(<FRAME_ROUNDS>>, <[sp]>)
-define(<FRAME_KEYS>, <[sp, #+4]>)
+define(`W0', `r4')
+define(`W1', `r5')
+define(`W2', `r6')
+define(`W3', `r7')
+define(`T0', `r8')
+define(`COUNT', `r10')
+define(`KEY', `r11')
+
+define(`X0', `r0') C Overlaps PARAM_ROUNDS and PARAM_KEYS
+define(`X1', `r1')
+define(`X2', `r12')
+define(`X3', `r14') C lr
+
+define(`FRAME_ROUNDS', `[sp]')
+define(`FRAME_KEYS', `[sp, #+4]')
C 8 saved registers
-define(<FRAME_DST>, <[sp, #+40]>)
-define(<FRAME_SRC>, <[sp, #+44]>)
+define(`FRAME_DST', `[sp, #+40]')
+define(`FRAME_SRC', `[sp, #+44]')
-define(<SRC>, <r12>) C Overlap registers used in inner loop.
-define(<DST>, <COUNT>)
+define(`SRC', `r12') C Overlap registers used in inner loop.
+define(`DST', `COUNT')
C AES_DECRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key)
-define(<AES_DECRYPT_ROUND>, <
+define(`AES_DECRYPT_ROUND', `
uxtb T0, $1
ldr $5, [TABLE, T0, lsl #2]
uxtb T0, $2
eor $6, $6, $2
eor $7, $7, $3
eor $8, $8, $4
->)
+')
.file "aes-decrypt-internal.asm"
C arm/v6/aes-encrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.arch armv6
-include_src(<arm/aes.m4>)
+include_src(`arm/aes.m4')
C Benchmarked at at 706, 870, 963 cycles/block on cortex A9,
C for 128, 192 and 256 bit key sizes.
C Possible improvements: More efficient load and store with
C aligned accesses. Better scheduling.
-define(<PARAM_ROUNDS>, <r0>)
-define(<PARAM_KEYS>, <r1>)
-define(<TABLE>, <r2>)
-define(<LENGTH>, <r3>)
+define(`PARAM_ROUNDS', `r0')
+define(`PARAM_KEYS', `r1')
+define(`TABLE', `r2')
+define(`LENGTH', `r3')
C On stack: DST, SRC
-define(<W0>, <r4>)
-define(<W1>, <r5>)
-define(<W2>, <r6>)
-define(<W3>, <r7>)
-define(<T0>, <r8>)
-define(<COUNT>, <r10>)
-define(<KEY>, <r11>)
-
-define(<X0>, <r0>) C Overlaps PARAM_ROUNDS and PARAM_KEYS
-define(<X1>, <r1>)
-define(<X2>, <r12>)
-define(<X3>, <r14>) C lr
-
-define(<FRAME_ROUNDS>>, <[sp]>)
-define(<FRAME_KEYS>, <[sp, #+4]>)
+define(`W0', `r4')
+define(`W1', `r5')
+define(`W2', `r6')
+define(`W3', `r7')
+define(`T0', `r8')
+define(`COUNT', `r10')
+define(`KEY', `r11')
+
+define(`X0', `r0') C Overlaps PARAM_ROUNDS and PARAM_KEYS
+define(`X1', `r1')
+define(`X2', `r12')
+define(`X3', `r14') C lr
+
+define(`FRAME_ROUNDS', `[sp]')
+define(`FRAME_KEYS', `[sp, #+4]')
C 8 saved registers
-define(<FRAME_DST>, <[sp, #+40]>)
-define(<FRAME_SRC>, <[sp, #+44]>)
+define(`FRAME_DST', `[sp, #+40]')
+define(`FRAME_SRC', `[sp, #+44]')
-define(<SRC>, <r12>) C Overlap registers used in inner loop.
-define(<DST>, <COUNT>)
+define(`SRC', `r12') C Overlap registers used in inner loop.
+define(`DST', `COUNT')
C 53 instr.
C It's tempting to use eor with rotation, but that's slower.
C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key)
-define(<AES_ENCRYPT_ROUND>, <
+define(`AES_ENCRYPT_ROUND', `
uxtb T0, $1
ldr $5, [TABLE, T0, lsl #2]
uxtb T0, $2
eor $6, $6, $2
eor $7, $7, $3
eor $8, $8, $4
->)
+')
.file "aes-encrypt-internal.asm"
C arm/v6/sha1-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha1-compress.asm"
.arch armv6
-define(<STATE>, <r0>)
-define(<INPUT>, <r1>)
-define(<SA>, <r2>)
-define(<SB>, <r3>)
-define(<SC>, <r4>)
-define(<SD>, <r5>)
-define(<SE>, <r6>)
-define(<T0>, <r7>)
-define(<SHIFT>, <r8>)
-define(<WPREV>, <r10>)
-define(<W>, <r12>)
-define(<K>, <lr>)
+define(`STATE', `r0')
+define(`INPUT', `r1')
+define(`SA', `r2')
+define(`SB', `r3')
+define(`SC', `r4')
+define(`SD', `r5')
+define(`SE', `r6')
+define(`T0', `r7')
+define(`SHIFT', `r8')
+define(`WPREV', `r10')
+define(`W', `r12')
+define(`K', `lr')
C FIXME: Could avoid a mov with even and odd variants.
-define(<LOAD>, <
+define(`LOAD', `
ldr T0, [INPUT], #+4
sel W, WPREV, T0
ror W, W, SHIFT
mov WPREV, T0
-IF_LE(< rev W, W>)
+IF_LE(` rev W, W')
str W, [SP,#eval(4*$1)]
->)
-define(<EXPN>, <
+')
+define(`EXPN', `
ldr W, [sp, #+eval(4*$1)]
ldr T0, [sp, #+eval(4*(($1 + 2) % 16))]
eor W, W, T0
eor W, W, T0
ror W, W, #31
str W, [sp, #+eval(4*$1)]
->)
+')
C F1(B,C,D) = D^(B&(C^D))
C ROUND1(A,B,C,D,E)
-define(<ROUND1>, <
+define(`ROUND1', `
eor T0, $3, $4
add $5, $5, K
and T0, T0, $2
add $5, $5, W
ror $2, $2, #2
add $5, $5, T0
->)
+')
C F2(B,C,D) = B^C^D
-define(<ROUND2>, <
+define(`ROUND2', `
eor T0, $2, $4
add $5, $5, K
eor T0, T0, $3
add $5, $5, W
ror $2, $2, #2
add $5, $5, T0
->)
+')
C F3(B,C,D) = (B&C) | (D & (B|C)) = (B & (C ^ D)) + (C & D)
-define(<ROUND3>, <
+define(`ROUND3', `
eor T0, $3, $4
add $5, $5, K
and T0, T0, $2
and T0, $3, $4
ror $2, $2, #2
add $5, $5, T0
->)
+')
C void nettle_sha1_compress(uint32_t *state, const uint8_t *input)
.text
lsl SHIFT, SHIFT, #3
mov T0, #0
movne T0, #-1
-IF_LE(< lsl W, T0, SHIFT>)
-IF_BE(< lsr W, T0, SHIFT>)
+IF_LE(` lsl W, T0, SHIFT')
+IF_BE(` lsr W, T0, SHIFT')
uadd8 T0, T0, W C Sets APSR.GE bits
C on BE rotate right by 32-SHIFT bits
C because there is no rotate left
-IF_BE(< rsb SHIFT, SHIFT, #32>)
+IF_BE(` rsb SHIFT, SHIFT, #32')
ldr K, .LK1
ldm STATE, {SA,SB,SC,SD,SE}
C arm/v6/sha256-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha256-compress.asm"
.arch armv6
-define(<STATE>, <r0>)
-define(<INPUT>, <r1>)
-define(<K>, <r2>)
-define(<SA>, <r3>)
-define(<SB>, <r4>)
-define(<SC>, <r5>)
-define(<SD>, <r6>)
-define(<SE>, <r7>)
-define(<SF>, <r8>)
-define(<SG>, <r10>)
-define(<SH>, <r11>)
-define(<T0>, <r12>)
-define(<T1>, <r1>) C Overlap INPUT
-define(<COUNT>, <r0>) C Overlap STATE
-define(<W>, <r14>)
+define(`STATE', `r0')
+define(`INPUT', `r1')
+define(`K', `r2')
+define(`SA', `r3')
+define(`SB', `r4')
+define(`SC', `r5')
+define(`SD', `r6')
+define(`SE', `r7')
+define(`SF', `r8')
+define(`SG', `r10')
+define(`SH', `r11')
+define(`T0', `r12')
+define(`T1', `r1') C Overlap INPUT
+define(`COUNT', `r0') C Overlap STATE
+define(`W', `r14')
C Used for data load
-define(<I0>, <r3>)
-define(<I1>, <r4>)
-define(<I2>, <r5>)
-define(<I3>, <r6>)
-define(<I4>, <r7>)
-define(<DST>, <r8>)
-define(<SHIFT>, <r10>)
-define(<ILEFT>, <r11>)
-
-define(<EXPN>, <
+define(`I0', `r3')
+define(`I1', `r4')
+define(`I2', `r5')
+define(`I3', `r6')
+define(`I4', `r7')
+define(`DST', `r8')
+define(`SHIFT', `r10')
+define(`ILEFT', `r11')
+
+define(`EXPN', `
ldr W, [sp, #+eval(4*$1)]
ldr T0, [sp, #+eval(4*(($1 + 14) % 16))]
ror T1, T0, #17
eor T1, T1, T0, lsr #3
add W, W, T1
str W, [sp, #+eval(4*$1)]
->)
+')
C ROUND(A,B,C,D,E,F,G,H)
C
C Choice (E, F, G) = G^(E&(F^G))
C Majority (A,B,C) = (A&B) + (C&(A^B))
-define(<ROUND>, <
+define(`ROUND', `
ror T0, $5, #6
eor T0, T0, $5, ror #11
eor T0, T0, $5, ror #25
eor T0, $1, $2
and T0, T0, $3
add $8, $8, T0
->)
+')
-define(<NOEXPN>, <
+define(`NOEXPN', `
ldr W, [sp, + $1]
add $1, $1, #4
->)
+')
C void
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
lsl SHIFT, SHIFT, #3
mov T0, #0
movne T0, #-1
-IF_LE(< lsl I1, T0, SHIFT>)
-IF_BE(< lsr I1, T0, SHIFT>)
+IF_LE(` lsl I1, T0, SHIFT')
+IF_BE(` lsr I1, T0, SHIFT')
uadd8 T0, T0, I1 C Sets APSR.GE bits
C on BE rotate right by 32-SHIFT bits
C because there is no rotate left
-IF_BE(< rsb SHIFT, SHIFT, #32>)
+IF_BE(` rsb SHIFT, SHIFT, #32')
mov DST, sp
mov ILEFT, #4
ldm INPUT!, {I1,I2,I3,I4}
sel I0, I0, I1
ror I0, I0, SHIFT
-IF_LE(< rev I0, I0>)
+IF_LE(` rev I0, I0')
sel I1, I1, I2
ror I1, I1, SHIFT
-IF_LE(< rev I1, I1>)
+IF_LE(` rev I1, I1')
sel I2, I2, I3
ror I2, I2, SHIFT
-IF_LE(< rev I2, I2>)
+IF_LE(` rev I2, I2')
sel I3, I3, I4
ror I3, I3, SHIFT
-IF_LE(< rev I3, I3>)
+IF_LE(` rev I3, I3')
subs ILEFT, ILEFT, #1
stm DST!, {I0,I1,I2,I3}
mov I0, I4