From: Niels Möller Date: Sun, 13 Sep 2020 18:11:09 +0000 (+0200) Subject: Use default m4 quote character in asm files, part 2 X-Git-Tag: nettle_3.7rc1~71 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=80e7cec60267594563ca9a5dcb22661c01fbc802;p=thirdparty%2Fnettle.git Use default m4 quote character in asm files, part 2 Update arm files. --- diff --git a/arm/aes-decrypt-internal.asm b/arm/aes-decrypt-internal.asm index 3da333c8..1b04ed9a 100644 --- a/arm/aes-decrypt-internal.asm +++ b/arm/aes-decrypt-internal.asm @@ -1,6 +1,6 @@ C arm/aes-decrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,39 +28,39 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -include_src() +include_src(`arm/aes.m4') -define(, ) -define(, ) -define(, ) -define(, ) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`PARAM_LENGTH', `r3') C On stack: DST, SRC -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) C Overlaps inputs, except TABLE -define(, ) -define(, ) -define(, ) -define(, ) C lr - -define(, <[sp]>) -define(, <[sp, #+4]>) -define(, <[sp, #+8]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`MASK', `r0') C Overlaps inputs, except TABLE +define(`X0', `r1') +define(`X1', `r3') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') +define(`FRAME_LENGTH', `[sp, #+8]') C 8 saved registers -define(, <[sp, #+44]>) -define(, <[sp, #+48]>) +define(`FRAME_DST', `[sp, #+44]') +define(`FRAME_SRC', `[sp, #+48]') -define(, < +define(`AES_DECRYPT_ROUND', ` and T0, MASK, $1, lsl #2 ldr $5, [TABLE, T0] and T0, MASK, $2, lsl #2 @@ -118,7 +118,7 @@ define(, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-decrypt-internal.asm" diff --git a/arm/aes-encrypt-internal.asm b/arm/aes-encrypt-internal.asm index e8b3df6b..a16a9d57 100644 --- a/arm/aes-encrypt-internal.asm +++ b/arm/aes-encrypt-internal.asm @@ -1,6 +1,6 @@ C arm/aes-encrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,9 +28,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -include_src() +include_src(`arm/aes.m4') C Benchmarked at at 725, 815, 990 cycles/block on cortex A9, C for 128, 192 and 256 bit key sizes. @@ -38,37 +38,37 @@ C for 128, 192 and 256 bit key sizes. C Possible improvements: More efficient load and store with C aligned accesses. Better scheduling. -define(, ) -define(, ) -define(
, ) -define(, ) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`PARAM_LENGTH', `r3') C On stack: DST, SRC -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) C Overlaps inputs, except TABLE -define(, ) -define(, ) -define(, ) -define(, ) C lr - -define(, <[sp]>) -define(, <[sp, #+4]>) -define(, <[sp, #+8]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`MASK', `r0') C Overlaps inputs, except TABLE +define(`X0', `r1') +define(`X1', `r3') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') +define(`FRAME_LENGTH', `[sp, #+8]') C 8 saved registers -define(, <[sp, #+44]>) -define(, <[sp, #+48]>) +define(`FRAME_DST', `[sp, #+44]') +define(`FRAME_SRC', `[sp, #+48]') C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) C MASK should hold the constant 0x3fc. -define(, < +define(`AES_ENCRYPT_ROUND', ` and T0, MASK, $1, lsl #2 ldr $5, [TABLE, T0] @@ -127,7 +127,7 @@ define(, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-encrypt-internal.asm" diff --git a/arm/aes.m4 b/arm/aes.m4 index 91f340a1..95382de2 100644 --- a/arm/aes.m4 +++ b/arm/aes.m4 @@ -1,6 +1,6 @@ C Loads one word, and adds it to the subkey. Uses T0 C AES_LOAD(SRC, KEY, REG) -define(, < +define(`AES_LOAD', ` ldrb $3, [$1], #+1 ldrb T0, [$1], #+1 orr $3, T0, lsl #8 @@ -10,10 +10,10 @@ define(, < orr $3, T0, lsl #24 ldr T0, [$2], #+4 eor $3, T0 ->) +') C Stores one word. Destroys input. C AES_STORE(DST, X) -define(, < +define(`AES_STORE', ` strb $2, [$1], #+1 ror $2, $2, #8 strb $2, [$1], #+1 @@ -21,10 +21,10 @@ define(, < strb $2, [$1], #+1 ror $2, $2, #8 strb $2, [$1], #+1 ->) +') C AES_FINAL_ROUND_V6(a,b,c,d,key,res) -define(, < +define(`AES_FINAL_ROUND_V6', ` uxtb T0, $1 ldrb $6, [TABLE, T0] uxtb T0, $2, ror #8 @@ -37,12 +37,12 @@ define(, < eor $6, $6, T0, lsl #24 ldr T0, [$5], #+4 eor $6, $6, T0 ->) +') C AES_FINAL_ROUND_V5(a,b,c,d,key,res,mask) C Avoids the uxtb instruction, introduced in ARMv6. C The mask argument should hold the constant 0xff -define(, < +define(`AES_FINAL_ROUND_V5', ` and T0, $7, $1 ldrb $6, [TABLE, T0] and T0, $7, $2, ror #8 @@ -55,4 +55,4 @@ define(, < eor $6, $6, T0, lsl #24 ldr T0, [$5], #+4 eor $6, T0 ->) +') diff --git a/arm/ecc-secp192r1-modp.asm b/arm/ecc-secp192r1-modp.asm index 4c596a16..72a81a54 100644 --- a/arm/ecc-secp192r1-modp.asm +++ b/arm/ecc-secp192r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp192r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,26 +28,26 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp192r1-modp.asm" .arm -define(, ) C Overlaps unused modulo argument -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) C Overlaps T0 and T1 -define(

, ) -define(, ) -define(, ) +define(`HP', `r0') C Overlaps unused modulo argument +define(`RP', `r1') + +define(`T0', `r2') +define(`T1', `r3') +define(`T2', `r4') +define(`T3', `r5') +define(`T4', `r6') +define(`T5', `r7') +define(`T6', `r8') +define(`T7', `r10') +define(`H0', `T0') C Overlaps T0 and T1 +define(`H1', `T1') +define(`C2', `HP') +define(`C4', `r12') C ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp224r1-modp.asm b/arm/ecc-secp224r1-modp.asm index 67089a0c..3256601c 100644 --- a/arm/ecc-secp224r1-modp.asm +++ b/arm/ecc-secp224r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp224r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,25 +28,25 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp224r1-modp.asm" .arm -define(, ) -define(, ) C Overlaps unused modulo argument - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`RP', `r1') +define(`H', `r0') C Overlaps unused modulo argument + +define(`T0', `r2') +define(`T1', `r3') +define(`T2', `r4') +define(`T3', `r5') +define(`T4', `r6') +define(`T5', `r7') +define(`T6', `r8') +define(`N3', `r10') +define(`L0', `r11') +define(`L1', `r12') +define(`L2', `lr') C ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp256r1-redc.asm b/arm/ecc-secp256r1-redc.asm index f8386c39..e127a2f2 100644 --- a/arm/ecc-secp256r1-redc.asm +++ b/arm/ecc-secp256r1-redc.asm @@ -1,6 +1,6 @@ C arm/ecc-secp256r1-redc.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,25 +28,25 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp256r1-redc.asm" .arm -define(, ) - -define(, ) C Overlaps unused modulo argument -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`RP', `r1') + +define(`T0', `r0') C Overlaps unused modulo argument +define(`T1', `r2') +define(`T2', `r3') +define(`T3', `r4') +define(`T4', `r5') +define(`T5', `r6') +define(`T6', `r7') +define(`T7', `r8') +define(`F0', `r10') +define(`F1', `r11') +define(`F2', `r12') +define(`F3', `lr') C ecc_secp256r1_redc (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp384r1-modp.asm b/arm/ecc-secp384r1-modp.asm index 1983ee68..96744ee9 100644 --- a/arm/ecc-secp384r1-modp.asm +++ b/arm/ecc-secp384r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp384r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,23 +28,23 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp384r1-modp.asm" .arm -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`RP', `r1') +define(`T0', `r0') +define(`T1', `r2') +define(`T2', `r3') +define(`T3', `r4') +define(`F0', `r5') +define(`F1', `r6') +define(`F2', `r7') +define(`F3', `r8') +define(`F4', `r10') +define(`N', `r12') +define(`H', `lr') C ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp521r1-modp.asm b/arm/ecc-secp521r1-modp.asm index 6d1759ec..22e8dd4e 100644 --- a/arm/ecc-secp521r1-modp.asm +++ b/arm/ecc-secp521r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp521r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,22 +28,22 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp521r1-modp.asm" .arm -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`HP', `r0') +define(`RP', `r1') +define(`T0', `r2') +define(`T1', `r3') +define(`T2', `r4') +define(`F0', `r5') +define(`F1', `r6') +define(`F2', `r7') +define(`F3', `r8') +define(`H', `r12') +define(`N', `lr') C ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/fat/aes-decrypt-internal-2.asm b/arm/fat/aes-decrypt-internal-2.asm index 2110f310..d51ae411 100644 --- a/arm/fat/aes-decrypt-internal-2.asm +++ b/arm/fat/aes-decrypt-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/aes-decrypt-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(, <$1_armv6>) -include_src() +define(`fat_transform', `$1_armv6') +include_src(`arm/v6/aes-decrypt-internal.asm') diff --git a/arm/fat/aes-decrypt-internal.asm b/arm/fat/aes-decrypt-internal.asm index 8d763889..9994fc07 100644 --- a/arm/fat/aes-decrypt-internal.asm +++ b/arm/fat/aes-decrypt-internal.asm @@ -1,7 +1,7 @@ C arm/fat/aes-decrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(, <$1_arm>) -include_src() +define(`fat_transform', `$1_arm') +include_src(`arm/aes-decrypt-internal.asm') diff --git a/arm/fat/aes-encrypt-internal-2.asm b/arm/fat/aes-encrypt-internal-2.asm index 490a52be..aeeab39e 100644 --- a/arm/fat/aes-encrypt-internal-2.asm +++ b/arm/fat/aes-encrypt-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/aes-encrypt-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(, <$1_armv6>) -include_src() +define(`fat_transform', `$1_armv6') +include_src(`arm/v6/aes-encrypt-internal.asm') diff --git a/arm/fat/aes-encrypt-internal.asm b/arm/fat/aes-encrypt-internal.asm index e695a289..efd14400 100644 --- a/arm/fat/aes-encrypt-internal.asm +++ b/arm/fat/aes-encrypt-internal.asm @@ -1,7 +1,7 @@ C arm/fat/aes-encrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(, <$1_arm>) -include_src() +define(`fat_transform', `$1_arm') +include_src(`arm/aes-encrypt-internal.asm') diff --git a/arm/fat/chacha-3core.asm b/arm/fat/chacha-3core.asm index 7938ee89..af6189b7 100644 --- a/arm/fat/chacha-3core.asm +++ b/arm/fat/chacha-3core.asm @@ -1,7 +1,7 @@ C arm/fat/chacha-3core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -29,8 +29,8 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_fat_chacha_3core) picked up by configure -include_src() +include_src(`arm/neon/chacha-3core.asm') diff --git a/arm/fat/chacha-core-internal-2.asm b/arm/fat/chacha-core-internal-2.asm index 66a5c145..3715471c 100644 --- a/arm/fat/chacha-core-internal-2.asm +++ b/arm/fat/chacha-core-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/chacha-core-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_chacha_core) picked up by configure -define(, <$1_neon>) -include_src() +define(`fat_transform', `$1_neon') +include_src(`arm/neon/chacha-core-internal.asm') diff --git a/arm/fat/salsa20-2core.asm b/arm/fat/salsa20-2core.asm index 43d9a1d0..2d5c6e24 100644 --- a/arm/fat/salsa20-2core.asm +++ b/arm/fat/salsa20-2core.asm @@ -1,7 +1,7 @@ C arm/fat/salsa20-2core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -29,8 +29,8 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_fat_salsa20_2core) picked up by configure -include_src() +include_src(`arm/neon/salsa20-2core.asm') diff --git a/arm/fat/salsa20-core-internal-2.asm b/arm/fat/salsa20-core-internal-2.asm index 64d90302..f88afd86 100644 --- a/arm/fat/salsa20-core-internal-2.asm +++ b/arm/fat/salsa20-core-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/salsa20-core-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_salsa20_core) picked up by configure -define(, <$1_neon>) -include_src() +define(`fat_transform', `$1_neon') +include_src(`arm/neon/salsa20-core-internal.asm') diff --git a/arm/fat/sha1-compress-2.asm b/arm/fat/sha1-compress-2.asm index 4c26c3c6..8586499d 100644 --- a/arm/fat/sha1-compress-2.asm +++ b/arm/fat/sha1-compress-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha1-compress-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(nettle_sha1_compress) picked up by configure -define(, <_$1_armv6>) -include_src() +define(`fat_transform', `_$1_armv6') +include_src(`arm/v6/sha1-compress.asm') diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-2.asm index e1babb37..36d55e4b 100644 --- a/arm/fat/sha256-compress-2.asm +++ b/arm/fat/sha256-compress-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha256-compress-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_sha256_compress) picked up by configure -define(, <$1_armv6>) -include_src() +define(`fat_transform', `$1_armv6') +include_src(`arm/v6/sha256-compress.asm') diff --git a/arm/fat/sha3-permute-2.asm b/arm/fat/sha3-permute-2.asm index b423a762..07e1a6e9 100644 --- a/arm/fat/sha3-permute-2.asm +++ b/arm/fat/sha3-permute-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha3-permute-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_sha3_permute) picked up by configure -define(, <_$1_neon>) -include_src() +define(`fat_transform', `_$1_neon') +include_src(`arm/neon/sha3-permute.asm') diff --git a/arm/fat/sha512-compress-2.asm b/arm/fat/sha512-compress-2.asm index 428604e0..a753ce8c 100644 --- a/arm/fat/sha512-compress-2.asm +++ b/arm/fat/sha512-compress-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha3-compress-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_sha512_compress) picked up by configure -define(, <$1_neon>) -include_src() +define(`fat_transform', `$1_neon') +include_src(`arm/neon/sha512-compress.asm') diff --git a/arm/fat/umac-nh-2.asm b/arm/fat/umac-nh-2.asm index fc97cc6b..cb3a191a 100644 --- a/arm/fat/umac-nh-2.asm +++ b/arm/fat/umac-nh-2.asm @@ -1,7 +1,7 @@ C arm/fat/umac-nh-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_umac_nh) picked up by configure -define(, <$1_neon>) -include_src() +define(`fat_transform', `$1_neon') +include_src(`arm/neon/umac-nh.asm') diff --git a/arm/fat/umac-nh-n-2.asm b/arm/fat/umac-nh-n-2.asm index 32b7a830..9b005acf 100644 --- a/arm/fat/umac-nh-n-2.asm +++ b/arm/fat/umac-nh-n-2.asm @@ -1,7 +1,7 @@ C arm/fat/umac-nh-n-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_umac_nh_n) picked up by configure -define(, <$1_neon>) -include_src() +define(`fat_transform', `$1_neon') +include_src(`arm/neon/umac-nh-n.asm') diff --git a/arm/machine.m4 b/arm/machine.m4 index f982a66a..ccaa79d2 100644 --- a/arm/machine.m4 +++ b/arm/machine.m4 @@ -1,4 +1,4 @@ -define(, , )>)dnl + `NO REGISTER')')dnl -define(, , )>)dnl + `NO REGISTER')')dnl -define(, , )>)dnl + `NO REGISTER')')dnl diff --git a/arm/memxor.asm b/arm/memxor.asm index e4619629..1431a9e7 100644 --- a/arm/memxor.asm +++ b/arm/memxor.asm @@ -1,6 +1,6 @@ C arm/memxor.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,7 +28,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') C Possible speedups: C @@ -38,16 +38,16 @@ C cycles, regardless of alignment. C Register usage: -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`DST', `r0') +define(`SRC', `r1') +define(`N', `r2') +define(`CNT', `r6') +define(`TNC', `r12') C little-endian and big-endian need to shift in different directions for C alignment correction -define(, IF_LE(, )) -define(, IF_LE(, )) +define(`S0ADJ', IF_LE(`lsr', `lsl')) +define(`S1ADJ', IF_LE(`lsl', `lsr')) .syntax unified @@ -150,13 +150,13 @@ PROLOGUE(nettle_memxor) C Store bytes, one by one. .Lmemxor_leftover: C bring uppermost byte down for saving while preserving lower ones -IF_BE(< ror r3, #24>) +IF_BE(` ror r3, #24') strb r3, [DST], #+1 subs N, #1 beq .Lmemxor_done subs TNC, #8 C bring down next byte, no need to preserve -IF_LE(< lsr r3, #8>) +IF_LE(` lsr r3, #8') bne .Lmemxor_leftover b .Lmemxor_bytes .Lmemxor_odd_done: diff --git a/arm/memxor3.asm b/arm/memxor3.asm index b6c6da49..c2b43c13 100644 --- a/arm/memxor3.asm +++ b/arm/memxor3.asm @@ -1,6 +1,6 @@ C arm/memxor3.asm -ifelse(< +ifelse(` Copyright (C) 2013, 2015 Niels Möller This file is part of GNU Nettle. @@ -28,7 +28,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') C Possible speedups: C @@ -38,21 +38,21 @@ C cycles, regardless of alignment. C Register usage: -define(, ) -define(, ) -define(, ) -define(, ) +define(`DST', `r0') +define(`AP', `r1') +define(`BP', `r2') +define(`N', `r3') C Temporaries r4-r7 -define(, ) -define(, ) -define(, ) -define(, ) +define(`ACNT', `r8') +define(`ATNC', `r10') +define(`BCNT', `r11') +define(`BTNC', `r12') C little-endian and big-endian need to shift in different directions for C alignment correction -define(, IF_LE(, )) -define(, IF_LE(, )) +define(`S0ADJ', IF_LE(`lsr', `lsl')) +define(`S1ADJ', IF_LE(`lsl', `lsr')) .syntax unified @@ -169,13 +169,13 @@ PROLOGUE(nettle_memxor3) .Lmemxor3_au_leftover: C Store a byte at a time C bring uppermost byte down for saving while preserving lower ones -IF_LE(< ror r4, #24>) +IF_LE(` ror r4, #24') strb r4, [DST, #-1]! subs N, #1 beq .Lmemxor3_done subs ACNT, #8 C bring down next byte, no need to preserve -IF_BE(< lsr r4, #8>) +IF_BE(` lsr r4, #8') sub AP, #1 bne .Lmemxor3_au_leftover b .Lmemxor3_bytes @@ -277,19 +277,19 @@ IF_BE(< lsr r4, #8>) C Leftover bytes in r4, low end on LE and high end on BE before C preparatory alignment correction -IF_LE(< ror r4, ACNT>) -IF_BE(< ror r4, ATNC>) +IF_LE(` ror r4, ACNT') +IF_BE(` ror r4, ATNC') C now byte-aligned in high end on LE and low end on BE because we're C working downwards in saving the very first bytes of the buffer .Lmemxor3_uu_leftover: C bring uppermost byte down for saving while preserving lower ones -IF_LE(< ror r4, #24>) +IF_LE(` ror r4, #24') strb r4, [DST, #-1]! subs N, #1 beq .Lmemxor3_done subs ACNT, #8 C bring down next byte, no need to preserve -IF_BE(< lsr r4, #8>) +IF_BE(` lsr r4, #8') bne .Lmemxor3_uu_leftover b .Lmemxor3_bytes diff --git a/arm/neon/chacha-3core.asm b/arm/neon/chacha-3core.asm index 708494b2..bd1cf63c 100644 --- a/arm/neon/chacha-3core.asm +++ b/arm/neon/chacha-3core.asm @@ -1,6 +1,6 @@ C arm/neon/chacha-3core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -28,33 +28,33 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "chacha-3core.asm" .fpu neon -define(, ) -define(, ) -define(, ) +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') C State, X, Y and Z representing consecutive blocks -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`Y0', `q8') +define(`Y1', `q9') +define(`Y2', `q10') +define(`Y3', `q11') +define(`Z0', `q12') +define(`Z1', `q13') +define(`Z2', `q14') +define(`Z3', `q15') + +define(`T0', `q4') +define(`T1', `q5') +define(`T2', `q6') +define(`T3', `q7') .text .align 4 diff --git a/arm/neon/chacha-core-internal.asm b/arm/neon/chacha-core-internal.asm index 22f843e8..b0a775bd 100644 --- a/arm/neon/chacha-core-internal.asm +++ b/arm/neon/chacha-core-internal.asm @@ -1,6 +1,6 @@ C arm/neon/chacha-core-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013, 2015 Niels Möller This file is part of GNU Nettle. @@ -28,26 +28,26 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "chacha-core-internal.asm" .fpu neon -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, < +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') + +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`T0', `q8') +define(`S0', `q12') +define(`S1', `q13') +define(`S2', `q14') +define(`S3', `q15') + +define(`QROUND', ` C x0 += x1, x3 ^= x0, x3 lrot 16 C x2 += x3, x1 ^= x2, x1 lrot 12 C x0 += x1, x3 ^= x0, x3 lrot 8 @@ -76,7 +76,7 @@ define(, < vshl.i32 T0, $2, #7 vshr.u32 $2, $2, #25 veor $2, $2, T0 ->) +') .text .align 4 @@ -121,21 +121,21 @@ PROLOGUE(_nettle_chacha_core) C 12 15 14 13 >>> 3 C different number of elements needs to be C extracted on BE because of different column order -IF_LE(< vext.32 X1, X1, X1, #1>) -IF_BE(< vext.32 X1, X1, X1, #3>) +IF_LE(` vext.32 X1, X1, X1, #1') +IF_BE(` vext.32 X1, X1, X1, #3') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #3>) -IF_BE(< vext.32 X3, X3, X3, #1>) +IF_LE(` vext.32 X3, X3, X3, #3') +IF_BE(` vext.32 X3, X3, X3, #1') QROUND(X0, X1, X2, X3) subs ROUNDS, ROUNDS, #2 C Inverse rotation -IF_LE(< vext.32 X1, X1, X1, #3>) -IF_BE(< vext.32 X1, X1, X1, #1>) +IF_LE(` vext.32 X1, X1, X1, #3') +IF_BE(` vext.32 X1, X1, X1, #1') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #1>) -IF_BE(< vext.32 X3, X3, X3, #3>) +IF_LE(` vext.32 X3, X3, X3, #1') +IF_BE(` vext.32 X3, X3, X3, #3') bhi .Loop @@ -145,10 +145,10 @@ IF_BE(< vext.32 X3, X3, X3, #3>) vadd.u32 X3, X3, S3 C caller expects result little-endian -IF_BE(< vrev32.u8 X0, X0 +IF_BE(` vrev32.u8 X0, X0 vrev32.u8 X1, X1 vrev32.u8 X2, X2 - vrev32.u8 X3, X3>) + vrev32.u8 X3, X3') vstm DST, {X0,X1,X2,X3} bx lr diff --git a/arm/neon/salsa20-2core.asm b/arm/neon/salsa20-2core.asm index cdb6133a..d622edd6 100644 --- a/arm/neon/salsa20-2core.asm +++ b/arm/neon/salsa20-2core.asm @@ -1,6 +1,6 @@ C arm/neon/salsa20-2core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -28,28 +28,28 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "salsa20-2core.asm" .fpu neon -define(, ) -define(, ) -define(, ) +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') C State, even elements in X, odd elements in Y -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`Y0', `q8') +define(`Y1', `q9') +define(`Y2', `q10') +define(`Y3', `q11') +define(`T0', `q12') +define(`T1', `q13') +define(`T2', `q14') +define(`T3', `q15') .text .align 4 diff --git a/arm/neon/salsa20-core-internal.asm b/arm/neon/salsa20-core-internal.asm index 20710499..d59d7b80 100644 --- a/arm/neon/salsa20-core-internal.asm +++ b/arm/neon/salsa20-core-internal.asm @@ -1,6 +1,6 @@ C arm/neon/salsa20-core-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,29 +28,29 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "salsa20-core-internal.asm" .fpu neon -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') + +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`T0', `q8') +define(`T1', `q9') +define(`M0101', `q10') +define(`M0110', `q11') +define(`M0011', `q12') +define(`S1', `q13') +define(`S2', `q14') +define(`S3', `q15') -define(, < +define(`QROUND', ` vadd.i32 T0, $1, $4 vshl.i32 T1, T0, #7 vshr.u32 T0, T0, #25 @@ -74,7 +74,7 @@ define(, < vshr.u32 T0, T0, #14 veor $1, $1, T0 veor $1, $1, T1 ->) +') .text .align 4 @@ -168,21 +168,21 @@ PROLOGUE(_nettle_salsa20_core) C 6 1 12 11 >>> 1 C different number of elements needs to be C extracted on BE because of different column order -IF_LE(< vext.32 X1, X1, X1, #3>) -IF_BE(< vext.32 X1, X1, X1, #1>) +IF_LE(` vext.32 X1, X1, X1, #3') +IF_BE(` vext.32 X1, X1, X1, #1') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #1>) -IF_BE(< vext.32 X3, X3, X3, #3>) +IF_LE(` vext.32 X3, X3, X3, #1') +IF_BE(` vext.32 X3, X3, X3, #3') QROUND(X0, X3, X2, X1) subs ROUNDS, ROUNDS, #2 C Inverse rotation -IF_LE(< vext.32 X1, X1, X1, #1>) -IF_BE(< vext.32 X1, X1, X1, #3>) +IF_LE(` vext.32 X1, X1, X1, #1') +IF_BE(` vext.32 X1, X1, X1, #3') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #3>) -IF_BE(< vext.32 X3, X3, X3, #1>) +IF_LE(` vext.32 X3, X3, X3, #3') +IF_BE(` vext.32 X3, X3, X3, #1') bhi .Loop @@ -209,10 +209,10 @@ IF_BE(< vext.32 X3, X3, X3, #1>) vadd.u32 X3, X3, S3 C caller expects result little-endian -IF_BE(< vrev32.u8 X0, X0 +IF_BE(` vrev32.u8 X0, X0 vrev32.u8 X1, X1 vrev32.u8 X2, X2 - vrev32.u8 X3, X3>) + vrev32.u8 X3, X3') vstm DST, {X0,X1,X2,X3} bx lr diff --git a/arm/neon/sha3-permute.asm b/arm/neon/sha3-permute.asm index 43a523f8..46be4bc0 100644 --- a/arm/neon/sha3-permute.asm +++ b/arm/neon/sha3-permute.asm @@ -1,6 +1,6 @@ C arm/neon/sha3-permute.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,62 +28,62 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha3-permute.asm" .fpu neon -define(, ) -define(, ) -define(, ) +define(`CTX', `r0') +define(`COUNT', `r1') +define(`RC', `r2') C First column -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`A0', `d0') +define(`A5', `d2') +define(`A10', `d3') +define(`A15', `d4') +define(`A20', `d5') + +define(`A1', `d6') +define(`A2', `d7') +define(`A3', `d8') +define(`A4', `d9') + +define(`A6', `d16') +define(`A7', `d17') +define(`A8', `d18') +define(`A9', `d19') + +define(`A11', `d20') +define(`A12', `d21') +define(`A13', `d22') +define(`A14', `d23') + +define(`A16', `d24') +define(`A17', `d25') +define(`A18', `d26') +define(`A19', `d27') + +define(`A21', `d28') +define(`A22', `d29') +define(`A23', `d30') +define(`A24', `d31') + +define(`T0', `d10') +define(`T1', `d11') + +define(`C0', `d1') +define(`C1', `d12') +define(`C2', `d13') +define(`C3', `d14') +define(`C4', `d15') C ROL(DST, SRC, COUNT) C Must have SRC != DST -define(, < +define(`ROL', ` vshr.u64 $1, $2, #eval(64-$3) vsli.i64 $1, $2, #$3 - >) + ') C sha3_permute(struct sha3_ctx *ctx) .text diff --git a/arm/neon/sha512-compress.asm b/arm/neon/sha512-compress.asm index 828d9ce2..00633c16 100644 --- a/arm/neon/sha512-compress.asm +++ b/arm/neon/sha512-compress.asm @@ -1,6 +1,6 @@ C arm/neon/sha512-compress.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,73 +28,73 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha512-compress.asm" .fpu neon -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`STATE', `r0') +define(`INPUT', `r1') +define(`K', `r2') +define(`COUNT', `r3') +define(`SHIFT', `r12') + +define(`SA', `d0') +define(`SB', `d1') +define(`SC', `d2') +define(`SD', `d3') +define(`SE', `d4') +define(`SF', `d5') +define(`SG', `d6') +define(`SH', `d7') +define(`QSAB', `q0') +define(`QSCD', `q1') +define(`QSEF', `q2') +define(`QSGH', `q3') C d8-d15 are callee-save -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`DT0', `d8') +define(`DT1', `d9') +define(`QT01', `q4') +define(`DT2', `d10') +define(`DT3', `d11') +define(`QT23', `q5') +define(`DT4', `d12') +define(`DT5', `d13') +define(`QT45', `q6') C Used only when reading the input, can overlap with state -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, <$1>) -define(, eval(($1) % 16))>) +define(`DT6', `d0') +define(`DT7', `d1') +define(`QT67', `q0') + +define(`DW0', `d16') +define(`DW1', `d17') +define(`DW2', `d18') +define(`DW3', `d19') +define(`DW4', `d20') +define(`DW5', `d21') +define(`DW6', `d22') +define(`DW7', `d23') +define(`DW8', `d24') +define(`DW9', `d25') +define(`DW10', `d26') +define(`DW11', `d27') +define(`DW12', `d28') +define(`DW13', `d29') +define(`DW14', `d30') +define(`DW15', `d31') +define(`QW0001', `q8') +define(`QW0203', `q9') +define(`QW0405', `q10') +define(`QW0607', `q11') +define(`QW0809', `q12') +define(`QW1011', `q13') +define(`QW1213', `q14') +define(`QW1415', `q15') + +define(`EXPAND_ME', `$1') +define(`W', `EXPAND_ME(`DW'eval(($1) % 16))') C If x = W(i+14), y = w(i+1), we xor in parallel C @@ -105,7 +105,7 @@ C x >> 61 y >> 8 C xor x >> 6 y >> 7 C ----------------------------- C DT0 DT1 -define(, < +define(`EXPN', ` vshl.i64 DT0, W($1+14), #45 vshl.i64 DT1, W($1 + 1), #63 vshr.u64 DT2, W($1+14), #19 @@ -123,7 +123,7 @@ define(, < veor.i64 QT01, QT01, QT45 vadd.i64 W($1), W($1), DT0 vadd.i64 W($1), W($1), DT1 ->) +') C ROUND(A,B,C,D,E,F,G,H,i) C @@ -148,7 +148,7 @@ C e << 23 a << 25 C xor e >> 41 a >> 39 C ---------------------------- C DT0 DT1 -define(, < +define(`ROUND', ` vshl.i64 DT0, $5, #50 vshl.i64 DT1, $1, #36 vshr.u64 DT2, $5, #14 @@ -180,7 +180,7 @@ define(, < vadd.i64 DT1, DT1, DT2 vadd.i64 $4, $4, $8 vadd.i64 $8, $8, DT1 ->) +') C void C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k) diff --git a/arm/neon/umac-nh-n.asm b/arm/neon/umac-nh-n.asm index 42686e02..7e36afe2 100644 --- a/arm/neon/umac-nh-n.asm +++ b/arm/neon/umac-nh-n.asm @@ -1,6 +1,6 @@ C arm/neon/umac-nh-n.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,33 +28,33 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "umac-nh.asm" .fpu neon -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) C Accumulates for the first two operations. -define(, ) -define(, ) C Used for 3 and 4 iterations. -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`OUT', `r0') +define(`ITERS', `r1') +define(`KEY', `r2') +define(`LENGTH', `r3') +define(`MSG', `r12') +define(`SHIFT', `r14') + +define(`QA', `q0') +define(`QB', `q1') +define(`QY0', `q3') C Accumulates for the first two operations. +define(`DM', `d4') +define(`QY1', `q4') C Used for 3 and 4 iterations. +define(`QC', `q5') +define(`QD', `q6') +define(`QLEFT', `q8') +define(`QRIGHT', `q9') +define(`QT0', `q10') +define(`QT1', `q11') +define(`QT2', `q12') +define(`QK0', `q13') +define(`QK1', `q14') +define(`QK2', `q15') C FIXME: Try permuting subkeys using vld4, vzip or similar. diff --git a/arm/neon/umac-nh.asm b/arm/neon/umac-nh.asm index 38be654c..56ea6454 100644 --- a/arm/neon/umac-nh.asm +++ b/arm/neon/umac-nh.asm @@ -1,6 +1,6 @@ C arm/neon/umac-nh.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,26 +28,26 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "umac-nh.asm" .fpu neon -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`KEY', `r0') +define(`LENGTH', `r1') +define(`MSG', `r2') +define(`SHIFT', `r3') + +define(`QA', `q0') +define(`QB', `q1') +define(`DM', `d16') +define(`QLEFT', `q9') +define(`QRIGHT', `q10') +define(`QY', `q11') +define(`QT0', `q12') +define(`QT1', `q13') +define(`QK0', `q14') +define(`QK1', `q15') .text .align 3 @@ -98,7 +98,7 @@ PROLOGUE(_nettle_umac_nh) vadd.i64 D0REG(QY), D0REG(QY), D1REG(QY) C return value needs to respect word order mandated by AAPCS -IF_LE(< vmov r0, r1, D0REG(QY)>) -IF_BE(< vmov r1, r0, D0REG(QY)>) +IF_LE(` vmov r0, r1, D0REG(QY)') +IF_BE(` vmov r1, r0, D0REG(QY)') bx lr EPILOGUE(_nettle_umac_nh) diff --git a/arm/v6/aes-decrypt-internal.asm b/arm/v6/aes-decrypt-internal.asm index 45801050..e8c6e91a 100644 --- a/arm/v6/aes-decrypt-internal.asm +++ b/arm/v6/aes-decrypt-internal.asm @@ -1,6 +1,6 @@ C arm/v6/aes-decrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,42 +28,42 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .arch armv6 -include_src() +include_src(`arm/aes.m4') -define(, ) -define(, ) -define(

, ) -define(, ) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`LENGTH', `r3') C On stack: DST, SRC -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) C Overlaps PARAM_ROUNDS and PARAM_KEYS -define(, ) -define(, ) -define(, ) C lr - -define(>, <[sp]>) -define(, <[sp, #+4]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`X0', `r0') C Overlaps PARAM_ROUNDS and PARAM_KEYS +define(`X1', `r1') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') C 8 saved registers -define(, <[sp, #+40]>) -define(, <[sp, #+44]>) +define(`FRAME_DST', `[sp, #+40]') +define(`FRAME_SRC', `[sp, #+44]') -define(, ) C Overlap registers used in inner loop. -define(, ) +define(`SRC', `r12') C Overlap registers used in inner loop. +define(`DST', `COUNT') C AES_DECRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) -define(, < +define(`AES_DECRYPT_ROUND', ` uxtb T0, $1 ldr $5, [TABLE, T0, lsl #2] uxtb T0, $2 @@ -121,7 +121,7 @@ define(, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-decrypt-internal.asm" diff --git a/arm/v6/aes-encrypt-internal.asm b/arm/v6/aes-encrypt-internal.asm index 576cf8e0..6cbd66d6 100644 --- a/arm/v6/aes-encrypt-internal.asm +++ b/arm/v6/aes-encrypt-internal.asm @@ -1,6 +1,6 @@ C arm/v6/aes-encrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,11 +28,11 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .arch armv6 -include_src() +include_src(`arm/aes.m4') C Benchmarked at at 706, 870, 963 cycles/block on cortex A9, C for 128, 192 and 256 bit key sizes. @@ -40,38 +40,38 @@ C for 128, 192 and 256 bit key sizes. C Possible improvements: More efficient load and store with C aligned accesses. Better scheduling. -define(, ) -define(, ) -define(
, ) -define(, ) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`LENGTH', `r3') C On stack: DST, SRC -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, ) C Overlaps PARAM_ROUNDS and PARAM_KEYS -define(, ) -define(, ) -define(, ) C lr - -define(>, <[sp]>) -define(, <[sp, #+4]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`X0', `r0') C Overlaps PARAM_ROUNDS and PARAM_KEYS +define(`X1', `r1') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') C 8 saved registers -define(, <[sp, #+40]>) -define(, <[sp, #+44]>) +define(`FRAME_DST', `[sp, #+40]') +define(`FRAME_SRC', `[sp, #+44]') -define(, ) C Overlap registers used in inner loop. -define(, ) +define(`SRC', `r12') C Overlap registers used in inner loop. +define(`DST', `COUNT') C 53 instr. C It's tempting to use eor with rotation, but that's slower. C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) -define(, < +define(`AES_ENCRYPT_ROUND', ` uxtb T0, $1 ldr $5, [TABLE, T0, lsl #2] uxtb T0, $2 @@ -129,7 +129,7 @@ define(, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-encrypt-internal.asm" diff --git a/arm/v6/sha1-compress.asm b/arm/v6/sha1-compress.asm index f60b4230..be6170b3 100644 --- a/arm/v6/sha1-compress.asm +++ b/arm/v6/sha1-compress.asm @@ -1,6 +1,6 @@ C arm/v6/sha1-compress.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,34 +28,34 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha1-compress.asm" .arch armv6 -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) +define(`STATE', `r0') +define(`INPUT', `r1') +define(`SA', `r2') +define(`SB', `r3') +define(`SC', `r4') +define(`SD', `r5') +define(`SE', `r6') +define(`T0', `r7') +define(`SHIFT', `r8') +define(`WPREV', `r10') +define(`W', `r12') +define(`K', `lr') C FIXME: Could avoid a mov with even and odd variants. -define(, < +define(`LOAD', ` ldr T0, [INPUT], #+4 sel W, WPREV, T0 ror W, W, SHIFT mov WPREV, T0 -IF_LE(< rev W, W>) +IF_LE(` rev W, W') str W, [SP,#eval(4*$1)] ->) -define(, < +') +define(`EXPN', ` ldr W, [sp, #+eval(4*$1)] ldr T0, [sp, #+eval(4*(($1 + 2) % 16))] eor W, W, T0 @@ -65,11 +65,11 @@ define(, < eor W, W, T0 ror W, W, #31 str W, [sp, #+eval(4*$1)] ->) +') C F1(B,C,D) = D^(B&(C^D)) C ROUND1(A,B,C,D,E) -define(, < +define(`ROUND1', ` eor T0, $3, $4 add $5, $5, K and T0, T0, $2 @@ -78,9 +78,9 @@ define(, < add $5, $5, W ror $2, $2, #2 add $5, $5, T0 ->) +') C F2(B,C,D) = B^C^D -define(, < +define(`ROUND2', ` eor T0, $2, $4 add $5, $5, K eor T0, T0, $3 @@ -88,9 +88,9 @@ define(, < add $5, $5, W ror $2, $2, #2 add $5, $5, T0 ->) +') C F3(B,C,D) = (B&C) | (D & (B|C)) = (B & (C ^ D)) + (C & D) -define(, < +define(`ROUND3', ` eor T0, $3, $4 add $5, $5, K and T0, T0, $2 @@ -100,7 +100,7 @@ define(, < and T0, $3, $4 ror $2, $2, #2 add $5, $5, T0 ->) +') C void nettle_sha1_compress(uint32_t *state, const uint8_t *input) .text @@ -127,12 +127,12 @@ PROLOGUE(nettle_sha1_compress) lsl SHIFT, SHIFT, #3 mov T0, #0 movne T0, #-1 -IF_LE(< lsl W, T0, SHIFT>) -IF_BE(< lsr W, T0, SHIFT>) +IF_LE(` lsl W, T0, SHIFT') +IF_BE(` lsr W, T0, SHIFT') uadd8 T0, T0, W C Sets APSR.GE bits C on BE rotate right by 32-SHIFT bits C because there is no rotate left -IF_BE(< rsb SHIFT, SHIFT, #32>) +IF_BE(` rsb SHIFT, SHIFT, #32') ldr K, .LK1 ldm STATE, {SA,SB,SC,SD,SE} diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress.asm index 324730c7..3c021284 100644 --- a/arm/v6/sha256-compress.asm +++ b/arm/v6/sha256-compress.asm @@ -1,6 +1,6 @@ C arm/v6/sha256-compress.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,38 +28,38 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha256-compress.asm" .arch armv6 -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) C Overlap INPUT -define(, ) C Overlap STATE -define(, ) +define(`STATE', `r0') +define(`INPUT', `r1') +define(`K', `r2') +define(`SA', `r3') +define(`SB', `r4') +define(`SC', `r5') +define(`SD', `r6') +define(`SE', `r7') +define(`SF', `r8') +define(`SG', `r10') +define(`SH', `r11') +define(`T0', `r12') +define(`T1', `r1') C Overlap INPUT +define(`COUNT', `r0') C Overlap STATE +define(`W', `r14') C Used for data load -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - -define(, < +define(`I0', `r3') +define(`I1', `r4') +define(`I2', `r5') +define(`I3', `r6') +define(`I4', `r7') +define(`DST', `r8') +define(`SHIFT', `r10') +define(`ILEFT', `r11') + +define(`EXPN', ` ldr W, [sp, #+eval(4*$1)] ldr T0, [sp, #+eval(4*(($1 + 14) % 16))] ror T1, T0, #17 @@ -74,7 +74,7 @@ define(, < eor T1, T1, T0, lsr #3 add W, W, T1 str W, [sp, #+eval(4*$1)] ->) +') C ROUND(A,B,C,D,E,F,G,H) C @@ -89,7 +89,7 @@ C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10 C Choice (E, F, G) = G^(E&(F^G)) C Majority (A,B,C) = (A&B) + (C&(A^B)) -define(, < +define(`ROUND', ` ror T0, $5, #6 eor T0, T0, $5, ror #11 eor T0, T0, $5, ror #25 @@ -111,12 +111,12 @@ define(, < eor T0, $1, $2 and T0, T0, $3 add $8, $8, T0 ->) +') -define(, < +define(`NOEXPN', ` ldr W, [sp, + $1] add $1, $1, #4 ->) +') C void C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) @@ -137,12 +137,12 @@ PROLOGUE(_nettle_sha256_compress) lsl SHIFT, SHIFT, #3 mov T0, #0 movne T0, #-1 -IF_LE(< lsl I1, T0, SHIFT>) -IF_BE(< lsr I1, T0, SHIFT>) +IF_LE(` lsl I1, T0, SHIFT') +IF_BE(` lsr I1, T0, SHIFT') uadd8 T0, T0, I1 C Sets APSR.GE bits C on BE rotate right by 32-SHIFT bits C because there is no rotate left -IF_BE(< rsb SHIFT, SHIFT, #32>) +IF_BE(` rsb SHIFT, SHIFT, #32') mov DST, sp mov ILEFT, #4 @@ -150,16 +150,16 @@ IF_BE(< rsb SHIFT, SHIFT, #32>) ldm INPUT!, {I1,I2,I3,I4} sel I0, I0, I1 ror I0, I0, SHIFT -IF_LE(< rev I0, I0>) +IF_LE(` rev I0, I0') sel I1, I1, I2 ror I1, I1, SHIFT -IF_LE(< rev I1, I1>) +IF_LE(` rev I1, I1') sel I2, I2, I3 ror I2, I2, SHIFT -IF_LE(< rev I2, I2>) +IF_LE(` rev I2, I2') sel I3, I3, I4 ror I3, I3, SHIFT -IF_LE(< rev I3, I3>) +IF_LE(` rev I3, I3') subs ILEFT, ILEFT, #1 stm DST!, {I0,I1,I2,I3} mov I0, I4