]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
* sparc/machine.m4 (AES_ROUND): Better scheduling, by
authorNiels Möller <nisse@lysator.liu.se>
Sun, 16 Oct 2005 10:27:41 +0000 (12:27 +0200)
committerNiels Möller <nisse@lysator.liu.se>
Sun, 16 Oct 2005 10:27:41 +0000 (12:27 +0200)
interleaving independent operations.

Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.7
Rev: src/nettle/sparc/machine.m4:1.7

sparc/aes-encrypt-internal.asm
sparc/machine.m4

index 8e06e03a42a940a1e33583a57a7c9e9404601ec8..e5951ad19857d7b18ef9669c70029beb97d3c8b3 100644 (file)
@@ -133,10 +133,11 @@ C A:      nettle-1.13 C-code
 C B:   nettle-1.13 assembler
 C C:   New C-code
 C D:   New assembler, first correct version
-
+C E:   New assembler, with basic scheduling of AES_ROUND.
+       
 C      MB/s    cycles/block
 C A    1.2     1107
 C B    2.3     572
 C C    2.1     627
 C D    1.8     722
-       
+C E    2.6     496
index 5f3731a078e1abd4dd4bb7785076c2fa8d7c51ed..eb9e334b40ef4ec927f5408f628865d89598823d 100644 (file)
@@ -33,34 +33,27 @@ C FIXME: Needs better instruction scheduling, and perhaps more temporaries
 C Alternatively, we can use a single table and some rotations
 define(<AES_ROUND>, <
        and     $3, 0xff, TMP1          C  0
-       sll     TMP1, 2, TMP1           C  0
-       add     TMP1, AES_TABLE0, TMP1  C  0
-       ld      [$2 + TMP1], $8         C  0
-
        srl     $4, 6, TMP2             C  1
+       sll     TMP1, 2, TMP1           C  0
        and     TMP2, 0x3fc, TMP2       C  1
+       add     TMP1, AES_TABLE0, TMP1  C  0
        add     TMP2, AES_TABLE1, TMP2  C  1
-       ld      [$2 + TMP2], TMP2       C  1
-       nop
-       xor     $8, TMP2, $8            C  1
-
+       ld      [$2 + TMP1], $8         C  0    E0
        srl     $5, 14, TMP1            C  2
+       ld      [$2 + TMP2], TMP2       C  1
        and     TMP1, 0x3fc, TMP1       C  2
-       add     TMP1, AES_TABLE2, TMP1  C  2
-       ld      [$2 + TMP1], TMP1       C  2
-       nop
-       xor     $8, TMP1, $8            C  2
-
+       xor     $8, TMP2, $8            C  1    E1
        srl     $6, 22, TMP2            C  3
+       add     TMP1, AES_TABLE2, TMP1  C  2
        and     TMP2, 0x3fc, TMP2       C  3
+       ld      [$2 + TMP1], TMP1       C  2
        add     TMP2, AES_TABLE3, TMP2  C  3
+       xor     $8, TMP1, $8            C  2    E2
+       ld      [$7 + eval(4*$1)], TMP1 C  4
        ld      [$2 + TMP2], TMP2       C  3
-       nop
+       xor     $8, TMP1, $8            C  4
        xor     $8, TMP2, $8            C  3
-
-       ld      [$7 + eval(4*$1)], TMP2 C  4
-       nop
-       xor     $8, TMP2, $8>)dnl       C  4
+>)dnl
 
 C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)
 C Compute one word in the final round function. Output is converted to