]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
ppc: More interleaving of chacha_4core.
authorNiels Möller <nisse@lysator.liu.se>
Sat, 12 Dec 2020 15:46:51 +0000 (16:46 +0100)
committerNiels Möller <nisse@lysator.liu.se>
Sat, 12 Dec 2020 15:46:51 +0000 (16:46 +0100)
ChangeLog
powerpc64/p7/chacha-4core.asm

index 1f2e2d40e7af8b683afa56b0ba675b9a616c4ee4..21eecdeafe87010e5b858b71c0d43d9b0f949084 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2020-12-12  Niels Möller  <nisse@lysator.liu.se>
+
+       * powerpc64/p7/chacha-4core.asm: More interleaving of independent
+       instructions, gives slight speedup on Power9.
+
 2020-12-01  Niels Möller  <nisse@lysator.liu.se>
 
        * powerpc64/p7/chacha-4core.asm: Use protected zone below stack
index b2330247a40cac4af0baf3465ab3bab3ddcb402f..ed1445dddacfa152318dbcbb09c44b9ff3b8a7cf 100644 (file)
@@ -57,53 +57,53 @@ C Main loop for round
 define(`QR',`
        vadduwm $1, $1, $2
        vadduwm $5, $5, $6
-       vxor    $4, $4, $1
-       vxor    $8, $8, $5
-       vrlw    $4, $4, ROT16
-       vrlw    $8, $8, ROT16
        vadduwm $9, $9, $10
        vadduwm $13, $13, $14
+       vxor    $4, $4, $1
+       vxor    $8, $8, $5
        vxor    $12, $12, $9
        vxor    $16, $16, $13
+       vrlw    $4, $4, ROT16
+       vrlw    $8, $8, ROT16
        vrlw    $12, $12, ROT16
        vrlw    $16, $16, ROT16
 
        vadduwm $3, $3, $4
        vadduwm $7, $7, $8
-       vxor    $2, $2, $3
-       vxor    $6, $6, $7
-       vrlw    $2, $2, ROT12
-       vrlw    $6, $6, ROT12
        vadduwm $11, $11, $12
        vadduwm $15, $15, $16
+       vxor    $2, $2, $3
+       vxor    $6, $6, $7
        vxor    $10, $10, $11
        vxor    $14, $14, $15
+       vrlw    $2, $2, ROT12
+       vrlw    $6, $6, ROT12
        vrlw    $10, $10, ROT12
        vrlw    $14, $14, ROT12
 
        vadduwm $1, $1, $2
        vadduwm $5, $5, $6
-       vxor    $4, $4, $1
-       vxor    $8, $8, $5
-       vrlw    $4, $4, ROT8
-       vrlw    $8, $8, ROT8
        vadduwm $9, $9, $10
        vadduwm $13, $13, $14
+       vxor    $4, $4, $1
+       vxor    $8, $8, $5
        vxor    $12, $12, $9
        vxor    $16, $16, $13
+       vrlw    $4, $4, ROT8
+       vrlw    $8, $8, ROT8
        vrlw    $12, $12, ROT8
        vrlw    $16, $16, ROT8
 
        vadduwm $3, $3, $4
        vadduwm $7, $7, $8
-       vxor    $2, $2, $3
-       vxor    $6, $6, $7
-       vrlw    $2, $2, ROT7
-       vrlw    $6, $6, ROT7
        vadduwm $11, $11, $12
        vadduwm $15, $15, $16
+       vxor    $2, $2, $3
+       vxor    $6, $6, $7
        vxor    $10, $10, $11
        vxor    $14, $14, $15
+       vrlw    $2, $2, ROT7
+       vrlw    $6, $6, ROT7
        vrlw    $10, $10, ROT7
        vrlw    $14, $14, ROT7
 ')