C MA 02111-1307, USA.
-C Arguments
+C Arguments
define(<CTX>, <%i0>)
define(<T>, <%i1>)
define(<LENGTH>,<%i2>)
define(<DST>, <%i3>)
define(<SRC>, <%i4>)
-C AES state, two copies for unrolling
+C AES state, two copies for unrolling
define(<W0>, <%l0>)
define(<W1>, <%l1>)
define(<X2>, <%l6>)
define(<X3>, <%l7>)
-C %o0 and %01 are TMP1 and TMP2
+C %o0-%03 are used for loop invariants T0-T3
define(<KEY>, <%o4>)
define(<ROUND>, <%o5>)
+C %g1 and %g2 are TMP1 and TMP2
+
+
C Registers %g1-%g3 and %o0 - %o5 are free to use.
C The sparc32 stack frame looks like
save %sp, -FRAME_SIZE, %sp
cmp LENGTH, 0
be .Lend
- nop
+
+ C Loop invariants
+ add T, AES_TABLE0, T0
+ add T, AES_TABLE1, T1
+ add T, AES_TABLE2, T2
+ add T, AES_TABLE3, T3
.Lblock_loop:
C Read src, and add initial subkey
AES_LOAD(2, SRC, KEY, W2)
AES_LOAD(3, SRC, KEY, W3)
+ C Must be even, and includes the final round
+ ld [AES_NROUNDS + CTX], ROUND
add SRC, 16, SRC
add KEY, 16, KEY
- C Must be even, and includes the final round
- ld [AES_NROUNDS + CTX], ROUND
- nop
srl ROUND, 1, ROUND
C Last two rounds handled specially
sub ROUND, 1, ROUND
.Lround_loop:
+ C The AES_ROUND macro uses T0,... T3
C Transform W -> X
AES_ROUND(0, T, W0, W1, W2, W3, KEY, X0)
AES_ROUND(1, T, W1, W2, W3, W0, KEY, X1)
C eval.
C Used as temporaries by the AES macros
-define(<TMP1>, <%o0>)
-define(<TMP2>, <%o1>)
+define(<TMP1>, <%g1>)
+define(<TMP2>, <%g2>)
+
+C Loop invariants used by AES_ROUND
+define(<T0>, <%o0>)
+define(<T1>, <%o1>)
+define(<T2>, <%o2>)
+define(<T3>, <%o3>)
C AES_LOAD(i, src, key, res)
define(<AES_LOAD>, <
srl $4, 6, TMP2 C 1
sll TMP1, 2, TMP1 C 0
and TMP2, 0x3fc, TMP2 C 1
- add TMP1, AES_TABLE0, TMP1 C 0
- add TMP2, AES_TABLE1, TMP2 C 1
- ld [$2 + TMP1], $8 C 0 E0
+ ld [T0 + TMP1], $8 C 0 E0
srl $5, 14, TMP1 C 2
- ld [$2 + TMP2], TMP2 C 1
+ ld [T1 + TMP2], TMP2 C 1
and TMP1, 0x3fc, TMP1 C 2
xor $8, TMP2, $8 C 1 E1
srl $6, 22, TMP2 C 3
- add TMP1, AES_TABLE2, TMP1 C 2
+ ld [T2 + TMP1], TMP1 C 2
and TMP2, 0x3fc, TMP2 C 3
- ld [$2 + TMP1], TMP1 C 2
- add TMP2, AES_TABLE3, TMP2 C 3
xor $8, TMP1, $8 C 2 E2
ld [$7 + eval(4*$1)], TMP1 C 4
- ld [$2 + TMP2], TMP2 C 3
- xor $8, TMP1, $8 C 4
- xor $8, TMP2, $8 C 3
+ ld [T3 + TMP2], TMP2 C 3
+ xor $8, TMP1, $8 C 4 E4
+ xor $8, TMP2, $8 C 3 E3
>)dnl
C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)