From: Niels Möller Date: Sat, 12 Dec 2020 15:46:51 +0000 (+0100) Subject: ppc: More interleaving of chacha_4core. X-Git-Tag: nettle_3.7rc1~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=539efc9b997dfba757cacbbc512bc0a216ffe743;p=thirdparty%2Fnettle.git ppc: More interleaving of chacha_4core. --- diff --git a/ChangeLog b/ChangeLog index 1f2e2d40..21eecdea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2020-12-12 Niels Möller + + * powerpc64/p7/chacha-4core.asm: More interleaving of independent + instructions, gives slight speedup on Power9. + 2020-12-01 Niels Möller * powerpc64/p7/chacha-4core.asm: Use protected zone below stack diff --git a/powerpc64/p7/chacha-4core.asm b/powerpc64/p7/chacha-4core.asm index b2330247..ed1445dd 100644 --- a/powerpc64/p7/chacha-4core.asm +++ b/powerpc64/p7/chacha-4core.asm @@ -57,53 +57,53 @@ C Main loop for round define(`QR',` vadduwm $1, $1, $2 vadduwm $5, $5, $6 - vxor $4, $4, $1 - vxor $8, $8, $5 - vrlw $4, $4, ROT16 - vrlw $8, $8, ROT16 vadduwm $9, $9, $10 vadduwm $13, $13, $14 + vxor $4, $4, $1 + vxor $8, $8, $5 vxor $12, $12, $9 vxor $16, $16, $13 + vrlw $4, $4, ROT16 + vrlw $8, $8, ROT16 vrlw $12, $12, ROT16 vrlw $16, $16, ROT16 vadduwm $3, $3, $4 vadduwm $7, $7, $8 - vxor $2, $2, $3 - vxor $6, $6, $7 - vrlw $2, $2, ROT12 - vrlw $6, $6, ROT12 vadduwm $11, $11, $12 vadduwm $15, $15, $16 + vxor $2, $2, $3 + vxor $6, $6, $7 vxor $10, $10, $11 vxor $14, $14, $15 + vrlw $2, $2, ROT12 + vrlw $6, $6, ROT12 vrlw $10, $10, ROT12 vrlw $14, $14, ROT12 vadduwm $1, $1, $2 vadduwm $5, $5, $6 - vxor $4, $4, $1 - vxor $8, $8, $5 - vrlw $4, $4, ROT8 - vrlw $8, $8, ROT8 vadduwm $9, $9, $10 vadduwm $13, $13, $14 + vxor $4, $4, $1 + vxor $8, $8, $5 vxor $12, $12, $9 vxor $16, $16, $13 + vrlw $4, $4, ROT8 + vrlw $8, $8, ROT8 vrlw $12, $12, ROT8 vrlw $16, $16, ROT8 vadduwm $3, $3, $4 vadduwm $7, $7, $8 - vxor $2, $2, $3 - vxor $6, $6, $7 - vrlw $2, $2, ROT7 - vrlw $6, $6, ROT7 vadduwm $11, $11, $12 vadduwm $15, $15, $16 + vxor $2, $2, $3 + vxor $6, $6, $7 vxor $10, $10, $11 vxor $14, $14, $15 + vrlw $2, $2, ROT7 + vrlw $6, $6, ROT7 vrlw $10, $10, ROT7 vrlw $14, $14, ROT7 ')