* sparc/machine.m4 (AES_FINAL_ROUND): Better scheduling, by

author Niels Möller <nisse@lysator.liu.se>

Sun, 16 Oct 2005 12:24:13 +0000 (14:24 +0200)

committer Niels Möller <nisse@lysator.liu.se>

Sun, 16 Oct 2005 12:24:13 +0000 (14:24 +0200)
author Niels Möller <nisse@lysator.liu.se>
Sun, 16 Oct 2005 12:24:13 +0000 (14:24 +0200)
committer Niels Möller <nisse@lysator.liu.se>
Sun, 16 Oct 2005 12:24:13 +0000 (14:24 +0200)
diff --git a/sparc/aes-encrypt-internal.asm b/sparc/aes-encrypt-internal.asm

index 09964b69260d4e77cbc2f688b75af40cd981019e..58beb4df41824a477af45ef53027e3738ea85432 100644 (file)
--- a/sparc/aes-encrypt-internal.asm
+++ b/sparc/aes-encrypt-internal.asm
@@ -137,15 +137,19 @@ EPILOGUE(_nettle_aes_encrypt)
  
  C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128
  
-C A:   nettle-1.13 C-code
-C B:   nettle-1.13 assembler
-C C:   New C-code
-C D:   New assembler, first correct version
-C E:   New assembler, with basic scheduling of AES_ROUND.
+C 1:   nettle-1.13 C-code
+C 2:   nettle-1.13 assembler
+C 3:   New C-code
+C 4:   New assembler, first correct version
+C 5:   New assembler, with basic scheduling of AES_ROUND.
+C 6:   New assembpler, with loop invariants T0-T3.
+C 7:   New assembler, with basic scheduling also of AES_FINAL_ROUND.
         
  C      MB/s    cycles/block
-C A    1.2     1107
-C B    2.3     572
-C C    2.1     627
-C D    1.8     722
-C E    2.6     496
+C 1    1.2     1107
+C 2    2.3     572
+C 3    2.1     627
+C 4    1.8     722
+C 5    2.6     496
+C 6    3.0     437
+C 7    3.1     415
diff --git a/sparc/machine.m4 b/sparc/machine.m4

index adc205e22e7e106b0176828a1428a73613eafc13..0817327aa96fc6817ea874f24b280d7261441f88 100644 (file)
--- a/sparc/machine.m4
+++ b/sparc/machine.m4
@@ -66,29 +66,25 @@ define(<AES_FINAL_ROUND>, <
         ld      [$7 + eval(4*$1)], TMP3
  
         and     $3, 0xff, TMP1          C  0
-       ldub    [T + TMP1], TMP1        C  0
-       nop
-       xor     TMP3, TMP1, TMP1        C  0
-       stb     TMP1, [$8 + eval(4*$1)] C  0
-       
         srl     $4, 8, TMP2             C  1
+       ldub    [T + TMP1], TMP1        C  0
         and     TMP2, 0xff, TMP2        C  1
+       xor     TMP3, TMP1, TMP1        C  0
         ldub    [T + TMP2], TMP2        C  1
-       srl     TMP3, 8, TMP3           C  1
-       xor     TMP3, TMP2, TMP2        C  1
-       stb     TMP2, [$8 + eval(4*$1 + 1)]     C  1
-
+       stb     TMP1, [$8 + eval(4*$1)] C  0    E0
         srl     $5, 16, TMP1            C  2
+       srl     TMP3, 8, TMP3           C  1
         and     TMP1, 0xff, TMP1        C  2
+       xor     TMP3, TMP2, TMP2        C  1
         ldub    [T + TMP1], TMP1        C  2
-       srl     TMP3, 8, TMP3           C  2
-       xor     TMP3, TMP1, TMP1        C  2
-       stb     TMP1, [$8 + eval(4*$1 + 2)]     C  2
-
+       stb     TMP2, [$8 + eval(4*$1 + 1)]     C  1    E1
         srl     $6, 24, TMP2            C  3
+       srl     TMP3, 8, TMP3           C  2
         ldub    [T + TMP2], TMP2        C  3
+       xor     TMP3, TMP1, TMP1        C  2
         srl     TMP3, 8, TMP3           C  3
+       stb     TMP1, [$8 + eval(4*$1 + 2)]     C  2    E2
         xor     TMP3, TMP2, TMP2        C  3
-       stb     TMP2, [$8 + eval(4*$1 + 3)]     C  3
+       stb     TMP2, [$8 + eval(4*$1 + 3)]     C  3    E3
  >)
author	Niels Möller <nisse@lysator.liu.se>
	Sun, 16 Oct 2005 12:24:13 +0000 (14:24 +0200)
committer	Niels Möller <nisse@lysator.liu.se>
	Sun, 16 Oct 2005 12:24:13 +0000 (14:24 +0200)
sparc/aes-encrypt-internal.asm		patch \| blob \| blame \| history
sparc/machine.m4		patch \| blob \| blame \| history