]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
(_aes_crypt): Unrolled the inner loop, preparing
authorNiels Möller <nisse@lysator.liu.se>
Wed, 15 May 2002 19:30:41 +0000 (21:30 +0200)
committerNiels Möller <nisse@lysator.liu.se>
Wed, 15 May 2002 19:30:41 +0000 (21:30 +0200)
for optimizations suggested by Marcus Comstedt.

Rev: src/nettle/sparc/aes.asm:1.78

sparc/aes.asm

index 7a4f0816465285c1b74348d4ff5a3063a5871772..d1c55e84b338a71699dded5ae2d1aab71c5de73a 100644 (file)
@@ -83,6 +83,7 @@ _aes_crypt:
        add     %fp, -24, wtxt
        
        add     %fp, -40, tmp
+
        ld      [ctx + AES_NROUNDS], nrounds
        ! Compute xor, so that we can swap efficiently.
        xor     wtxt, tmp, diff
@@ -142,7 +143,46 @@ _aes_crypt:
        !
        ! The code uses the register %o[j], aka tj, as the primary 
        ! register for that sub-expression. True for j==1,3.
+
+       C i = 0
+       ld      [IDX1+i], t1            ! 1
+       
+       ! IDX2(j) = j XOR 2
+       xor     i, 8, t2
+       add     wtxt, t1, t1            ! 1
+       ldub    [t1+2], t1              ! 1
+       ld      [IDX3+i], t3            ! 3
+       
+       sll     t1, 2, t1               ! 1
+       ld      [wtxt+i], t0            ! 0
+       lduh    [wtxt+t2], t2           ! 2
+       and     t0, 255, t0             ! 0
+       
+       ldub    [wtxt+t3], t3           ! 3
+       sll     t0, 2, t0               ! 0
+       ld      [T0+t0], t0             ! 0
+       and     t2, 255, t2             ! 2
+       
+       ld      [T1+t1], t1             ! 1
+       sll     t2, 2, t2               ! 2
+       ld      [T2+t2], t2             ! 2
+       sll     t3, 2, t3               ! 3
+       
+       ld      [T3+t3], t3             ! 3
+       xor     t0, t1, t0              ! 0, 1
+       xor     t0, t2, t0              ! 0, 1, 2
+       ! Fetch roundkey
+       ld      [key+i], t1
+       
+       xor     t0, t3, t0              ! 0, 1, 2, 3
+       xor     t0, t1, t0
+       st      t0, [tmp+i]
+       C cmp   i, 8
        
+       C bleu  .Linner_loop
+       add     i, 4, i
+
+       C i = 1
        ld      [IDX1+i], t1            ! 1
        
        ! IDX2(j) = j XOR 2
@@ -175,10 +215,89 @@ _aes_crypt:
        xor     t0, t3, t0              ! 0, 1, 2, 3
        xor     t0, t1, t0
        st      t0, [tmp+i]
-       cmp     i, 8
+       C cmp   i, 8
        
-       bleu    .Linner_loop
+       C bleu  .Linner_loop
        add     i, 4, i
+
+       C = 2
+       ld      [IDX1+i], t1            ! 1
+       
+       ! IDX2(j) = j XOR 2
+       xor     i, 8, t2
+       add     wtxt, t1, t1            ! 1
+       ldub    [t1+2], t1              ! 1
+       ld      [IDX3+i], t3            ! 3
+       
+       sll     t1, 2, t1               ! 1
+       ld      [wtxt+i], t0            ! 0
+       lduh    [wtxt+t2], t2           ! 2
+       and     t0, 255, t0             ! 0
+       
+       ldub    [wtxt+t3], t3           ! 3
+       sll     t0, 2, t0               ! 0
+       ld      [T0+t0], t0             ! 0
+       and     t2, 255, t2             ! 2
+       
+       ld      [T1+t1], t1             ! 1
+       sll     t2, 2, t2               ! 2
+       ld      [T2+t2], t2             ! 2
+       sll     t3, 2, t3               ! 3
+       
+       ld      [T3+t3], t3             ! 3
+       xor     t0, t1, t0              ! 0, 1
+       xor     t0, t2, t0              ! 0, 1, 2
+       ! Fetch roundkey
+       ld      [key+i], t1
+       
+       xor     t0, t3, t0              ! 0, 1, 2, 3
+       xor     t0, t1, t0
+       st      t0, [tmp+i]
+       C cmp   i, 8
+       
+       C bleu  .Linner_loop
+       add     i, 4, i
+
+       C = 3
+       ld      [IDX1+i], t1            ! 1
+       
+       ! IDX2(j) = j XOR 2
+       xor     i, 8, t2
+       add     wtxt, t1, t1            ! 1
+       ldub    [t1+2], t1              ! 1
+       ld      [IDX3+i], t3            ! 3
+       
+       sll     t1, 2, t1               ! 1
+       ld      [wtxt+i], t0            ! 0
+       lduh    [wtxt+t2], t2           ! 2
+       and     t0, 255, t0             ! 0
+       
+       ldub    [wtxt+t3], t3           ! 3
+       sll     t0, 2, t0               ! 0
+       ld      [T0+t0], t0             ! 0
+       and     t2, 255, t2             ! 2
+       
+       ld      [T1+t1], t1             ! 1
+       sll     t2, 2, t2               ! 2
+       ld      [T2+t2], t2             ! 2
+       sll     t3, 2, t3               ! 3
+       
+       ld      [T3+t3], t3             ! 3
+       xor     t0, t1, t0              ! 0, 1
+       xor     t0, t2, t0              ! 0, 1, 2
+       ! Fetch roundkey
+       ld      [key+i], t1
+       
+       xor     t0, t3, t0              ! 0, 1, 2, 3
+       xor     t0, t1, t0
+       st      t0, [tmp+i]
+       C cmp   i, 8
+       
+       C bleu  .Linner_loop
+       add     i, 4, i
+                       
+       C End loop
+       
        ! switch roles for tmp and wtxt
        xor     wtxt, diff, wtxt
        xor     tmp, diff, tmp
@@ -247,7 +366,7 @@ define(i, round)
        sub     wtxt, src, %g3
 
 .Lend:
-       add     %sp, FRAME_SIZE, %fp
+       C add   %sp, FRAME_SIZE, %fp
        ret
        restore
 .LLFE1: