]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/asm/aes-sparcv9.pl
Doc nits cleanup, round 2
[thirdparty/openssl.git] / crypto / aes / asm / aes-sparcv9.pl
CommitLineData
7395d852
AP
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
985e4c41 9# Version 1.1
7395d852
AP
10#
11# The major reason for undertaken effort was to mitigate the hazard of
12# cache-timing attack. This is [currently and initially!] addressed in
13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14# 2. References to them are scheduled for L2 cache latency, meaning
15# that the tables don't have to reside in L1 cache. Once again, this
16# is an initial draft and one should expect more countermeasures to
17# be implemented...
18#
985e4c41
AP
19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
20# round.
21#
7395d852
AP
22# Even though performance was not the primary goal [on the contrary,
23# extra shifts "induced" by compressed S-box and longer loop epilogue
24# "induced" by scheduling for L2 have negative effect on performance],
25# the code turned out to run in ~23 cycles per processed byte en-/
26# decrypted with 128-bit key. This is pretty good result for code
27# with mentioned qualities and UltraSPARC core. Compared to Sun C
28# generated code my encrypt procedure runs just few percents faster,
29# while decrypt one - whole 50% faster [yes, Sun C failed to generate
30# optimal decrypt procedure]. Compared to GNU C generated code both
31# procedures are more than 60% faster:-)
32
eb77e888
AP
33$output = pop;
34open STDOUT,">$output";
35
36$frame="STACK_FRAME";
37$bias="STACK_BIAS";
7395d852
AP
38$locals=16;
39
40$acc0="%l0";
41$acc1="%o0";
42$acc2="%o1";
43$acc3="%o2";
44
45$acc4="%l1";
46$acc5="%o3";
47$acc6="%o4";
48$acc7="%o5";
49
50$acc8="%l2";
51$acc9="%o7";
52$acc10="%g1";
53$acc11="%g2";
54
55$acc12="%l3";
56$acc13="%g3";
57$acc14="%g4";
58$acc15="%g5";
59
60$t0="%l4";
61$t1="%l5";
62$t2="%l6";
63$t3="%l7";
64
65$s0="%i0";
66$s1="%i1";
67$s2="%i2";
68$s3="%i3";
69$tbl="%i4";
70$key="%i5";
71$rounds="%i7"; # aliases with return address, which is off-loaded to stack
72
73sub _data_word()
74{ my $i;
75 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
76}
77
eb77e888
AP
78$code.=<<___;
79#include "sparc_arch.h"
80
81#ifdef __arch64__
7395d852
AP
82.register %g2,#scratch
83.register %g3,#scratch
eb77e888 84#endif
7395d852
AP
85.section ".text",#alloc,#execinstr
86
985e4c41 87.align 256
7395d852
AP
88AES_Te:
89___
90&_data_word(
91 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
92 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
93 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
94 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
95 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
96 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
97 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
98 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
99 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
100 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
101 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
102 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
103 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
104 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
105 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
106 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
107 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
108 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
109 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
110 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
111 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
112 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
113 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
114 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
115 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
116 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
117 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
118 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
119 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
120 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
121 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
122 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
123 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
124 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
125 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
126 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
127 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
128 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
129 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
130 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
131 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
132 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
133 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
134 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
135 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
136 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
137 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
138 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
139 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
140 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
141 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
142 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
143 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
144 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
145 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
146 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
147 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
148 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
149 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
150 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
151 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
152 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
153 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
154 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
155$code.=<<___;
156 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
157 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
158 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
159 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
160 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
161 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
162 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
163 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
164 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
165 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
166 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
167 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
168 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
169 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
170 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
171 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
172 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
173 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
174 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
175 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
176 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
177 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
178 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
179 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
180 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
181 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
182 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
183 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
184 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
185 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
186 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
187 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
188.type AES_Te,#object
189.size AES_Te,(.-AES_Te)
190
191.align 64
192.skip 16
193_sparcv9_AES_encrypt:
194 save %sp,-$frame-$locals,%sp
195 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
196 ld [$key+240],$rounds
197 ld [$key+0],$t0
198 ld [$key+4],$t1 !
199 ld [$key+8],$t2
200 srl $rounds,1,$rounds
201 xor $t0,$s0,$s0
202 ld [$key+12],$t3
203 srl $s0,21,$acc0
204 xor $t1,$s1,$s1
205 ld [$key+16],$t0
206 srl $s1,13,$acc1 !
207 xor $t2,$s2,$s2
208 ld [$key+20],$t1
209 xor $t3,$s3,$s3
210 ld [$key+24],$t2
211 and $acc0,2040,$acc0
212 ld [$key+28],$t3
213 nop
214.Lenc_loop:
215 srl $s2,5,$acc2 !
216 and $acc1,2040,$acc1
217 ldx [$tbl+$acc0],$acc0
218 sll $s3,3,$acc3
219 and $acc2,2040,$acc2
220 ldx [$tbl+$acc1],$acc1
221 srl $s1,21,$acc4
222 and $acc3,2040,$acc3
223 ldx [$tbl+$acc2],$acc2 !
224 srl $s2,13,$acc5
225 and $acc4,2040,$acc4
226 ldx [$tbl+$acc3],$acc3
227 srl $s3,5,$acc6
228 and $acc5,2040,$acc5
229 ldx [$tbl+$acc4],$acc4
230 fmovs %f0,%f0
231 sll $s0,3,$acc7 !
232 and $acc6,2040,$acc6
233 ldx [$tbl+$acc5],$acc5
234 srl $s2,21,$acc8
235 and $acc7,2040,$acc7
236 ldx [$tbl+$acc6],$acc6
237 srl $s3,13,$acc9
238 and $acc8,2040,$acc8
239 ldx [$tbl+$acc7],$acc7 !
240 srl $s0,5,$acc10
241 and $acc9,2040,$acc9
242 ldx [$tbl+$acc8],$acc8
243 sll $s1,3,$acc11
244 and $acc10,2040,$acc10
245 ldx [$tbl+$acc9],$acc9
246 fmovs %f0,%f0
247 srl $s3,21,$acc12 !
248 and $acc11,2040,$acc11
249 ldx [$tbl+$acc10],$acc10
250 srl $s0,13,$acc13
251 and $acc12,2040,$acc12
252 ldx [$tbl+$acc11],$acc11
253 srl $s1,5,$acc14
254 and $acc13,2040,$acc13
255 ldx [$tbl+$acc12],$acc12 !
256 sll $s2,3,$acc15
257 and $acc14,2040,$acc14
258 ldx [$tbl+$acc13],$acc13
259 and $acc15,2040,$acc15
260 add $key,32,$key
261 ldx [$tbl+$acc14],$acc14
262 fmovs %f0,%f0
263 subcc $rounds,1,$rounds !
264 ldx [$tbl+$acc15],$acc15
265 bz,a,pn %icc,.Lenc_last
266 add $tbl,2048,$rounds
267
268 srlx $acc1,8,$acc1
269 xor $acc0,$t0,$t0
270 ld [$key+0],$s0
271 fmovs %f0,%f0
272 srlx $acc2,16,$acc2 !
273 xor $acc1,$t0,$t0
274 ld [$key+4],$s1
275 srlx $acc3,24,$acc3
276 xor $acc2,$t0,$t0
277 ld [$key+8],$s2
278 srlx $acc5,8,$acc5
279 xor $acc3,$t0,$t0
280 ld [$key+12],$s3 !
281 srlx $acc6,16,$acc6
282 xor $acc4,$t1,$t1
283 fmovs %f0,%f0
284 srlx $acc7,24,$acc7
285 xor $acc5,$t1,$t1
286 srlx $acc9,8,$acc9
287 xor $acc6,$t1,$t1
288 srlx $acc10,16,$acc10 !
289 xor $acc7,$t1,$t1
290 srlx $acc11,24,$acc11
291 xor $acc8,$t2,$t2
292 srlx $acc13,8,$acc13
293 xor $acc9,$t2,$t2
294 srlx $acc14,16,$acc14
295 xor $acc10,$t2,$t2
296 srlx $acc15,24,$acc15 !
297 xor $acc11,$t2,$t2
298 xor $acc12,$acc14,$acc14
299 xor $acc13,$t3,$t3
300 srl $t0,21,$acc0
301 xor $acc14,$t3,$t3
302 srl $t1,13,$acc1
303 xor $acc15,$t3,$t3
304
305 and $acc0,2040,$acc0 !
306 srl $t2,5,$acc2
307 and $acc1,2040,$acc1
308 ldx [$tbl+$acc0],$acc0
309 sll $t3,3,$acc3
310 and $acc2,2040,$acc2
311 ldx [$tbl+$acc1],$acc1
312 fmovs %f0,%f0
313 srl $t1,21,$acc4 !
314 and $acc3,2040,$acc3
315 ldx [$tbl+$acc2],$acc2
316 srl $t2,13,$acc5
317 and $acc4,2040,$acc4
318 ldx [$tbl+$acc3],$acc3
319 srl $t3,5,$acc6
320 and $acc5,2040,$acc5
321 ldx [$tbl+$acc4],$acc4 !
322 sll $t0,3,$acc7
323 and $acc6,2040,$acc6
324 ldx [$tbl+$acc5],$acc5
325 srl $t2,21,$acc8
326 and $acc7,2040,$acc7
327 ldx [$tbl+$acc6],$acc6
328 fmovs %f0,%f0
329 srl $t3,13,$acc9 !
330 and $acc8,2040,$acc8
331 ldx [$tbl+$acc7],$acc7
332 srl $t0,5,$acc10
333 and $acc9,2040,$acc9
334 ldx [$tbl+$acc8],$acc8
335 sll $t1,3,$acc11
336 and $acc10,2040,$acc10
337 ldx [$tbl+$acc9],$acc9 !
338 srl $t3,21,$acc12
339 and $acc11,2040,$acc11
340 ldx [$tbl+$acc10],$acc10
341 srl $t0,13,$acc13
342 and $acc12,2040,$acc12
343 ldx [$tbl+$acc11],$acc11
344 fmovs %f0,%f0
345 srl $t1,5,$acc14 !
346 and $acc13,2040,$acc13
347 ldx [$tbl+$acc12],$acc12
348 sll $t2,3,$acc15
349 and $acc14,2040,$acc14
350 ldx [$tbl+$acc13],$acc13
351 srlx $acc1,8,$acc1
352 and $acc15,2040,$acc15
353 ldx [$tbl+$acc14],$acc14 !
354
355 srlx $acc2,16,$acc2
356 xor $acc0,$s0,$s0
357 ldx [$tbl+$acc15],$acc15
358 srlx $acc3,24,$acc3
359 xor $acc1,$s0,$s0
360 ld [$key+16],$t0
361 fmovs %f0,%f0
362 srlx $acc5,8,$acc5 !
363 xor $acc2,$s0,$s0
364 ld [$key+20],$t1
365 srlx $acc6,16,$acc6
366 xor $acc3,$s0,$s0
367 ld [$key+24],$t2
368 srlx $acc7,24,$acc7
369 xor $acc4,$s1,$s1
370 ld [$key+28],$t3 !
371 srlx $acc9,8,$acc9
372 xor $acc5,$s1,$s1
985e4c41 373 ldx [$tbl+2048+0],%g0 ! prefetch te4
7395d852
AP
374 srlx $acc10,16,$acc10
375 xor $acc6,$s1,$s1
985e4c41 376 ldx [$tbl+2048+32],%g0 ! prefetch te4
7395d852
AP
377 srlx $acc11,24,$acc11
378 xor $acc7,$s1,$s1
985e4c41 379 ldx [$tbl+2048+64],%g0 ! prefetch te4
7395d852
AP
380 srlx $acc13,8,$acc13
381 xor $acc8,$s2,$s2
985e4c41 382 ldx [$tbl+2048+96],%g0 ! prefetch te4
7395d852
AP
383 srlx $acc14,16,$acc14 !
384 xor $acc9,$s2,$s2
985e4c41 385 ldx [$tbl+2048+128],%g0 ! prefetch te4
7395d852
AP
386 srlx $acc15,24,$acc15
387 xor $acc10,$s2,$s2
985e4c41 388 ldx [$tbl+2048+160],%g0 ! prefetch te4
7395d852
AP
389 srl $s0,21,$acc0
390 xor $acc11,$s2,$s2
985e4c41 391 ldx [$tbl+2048+192],%g0 ! prefetch te4
7395d852
AP
392 xor $acc12,$acc14,$acc14
393 xor $acc13,$s3,$s3
985e4c41 394 ldx [$tbl+2048+224],%g0 ! prefetch te4
7395d852
AP
395 srl $s1,13,$acc1 !
396 xor $acc14,$s3,$s3
397 xor $acc15,$s3,$s3
398 ba .Lenc_loop
399 and $acc0,2040,$acc0
400
401.align 32
402.Lenc_last:
403 srlx $acc1,8,$acc1 !
404 xor $acc0,$t0,$t0
405 ld [$key+0],$s0
406 srlx $acc2,16,$acc2
407 xor $acc1,$t0,$t0
408 ld [$key+4],$s1
409 srlx $acc3,24,$acc3
410 xor $acc2,$t0,$t0
411 ld [$key+8],$s2 !
412 srlx $acc5,8,$acc5
413 xor $acc3,$t0,$t0
414 ld [$key+12],$s3
415 srlx $acc6,16,$acc6
416 xor $acc4,$t1,$t1
417 srlx $acc7,24,$acc7
418 xor $acc5,$t1,$t1
419 srlx $acc9,8,$acc9 !
420 xor $acc6,$t1,$t1
421 srlx $acc10,16,$acc10
422 xor $acc7,$t1,$t1
423 srlx $acc11,24,$acc11
424 xor $acc8,$t2,$t2
425 srlx $acc13,8,$acc13
426 xor $acc9,$t2,$t2
427 srlx $acc14,16,$acc14 !
428 xor $acc10,$t2,$t2
429 srlx $acc15,24,$acc15
430 xor $acc11,$t2,$t2
431 xor $acc12,$acc14,$acc14
432 xor $acc13,$t3,$t3
433 srl $t0,24,$acc0
434 xor $acc14,$t3,$t3
435 srl $t1,16,$acc1 !
436 xor $acc15,$t3,$t3
437
438 srl $t2,8,$acc2
439 and $acc1,255,$acc1
440 ldub [$rounds+$acc0],$acc0
441 srl $t1,24,$acc4
442 and $acc2,255,$acc2
443 ldub [$rounds+$acc1],$acc1
444 srl $t2,16,$acc5 !
445 and $t3,255,$acc3
446 ldub [$rounds+$acc2],$acc2
447 ldub [$rounds+$acc3],$acc3
448 srl $t3,8,$acc6
449 and $acc5,255,$acc5
450 ldub [$rounds+$acc4],$acc4
451 fmovs %f0,%f0
452 srl $t2,24,$acc8 !
453 and $acc6,255,$acc6
454 ldub [$rounds+$acc5],$acc5
455 srl $t3,16,$acc9
456 and $t0,255,$acc7
457 ldub [$rounds+$acc6],$acc6
458 ldub [$rounds+$acc7],$acc7
459 fmovs %f0,%f0
460 srl $t0,8,$acc10 !
461 and $acc9,255,$acc9
462 ldub [$rounds+$acc8],$acc8
463 srl $t3,24,$acc12
464 and $acc10,255,$acc10
465 ldub [$rounds+$acc9],$acc9
466 srl $t0,16,$acc13
467 and $t1,255,$acc11
468 ldub [$rounds+$acc10],$acc10 !
469 srl $t1,8,$acc14
470 and $acc13,255,$acc13
471 ldub [$rounds+$acc11],$acc11
472 ldub [$rounds+$acc12],$acc12
473 and $acc14,255,$acc14
474 ldub [$rounds+$acc13],$acc13
475 and $t2,255,$acc15
476 ldub [$rounds+$acc14],$acc14 !
477
478 sll $acc0,24,$acc0
479 xor $acc3,$s0,$s0
480 ldub [$rounds+$acc15],$acc15
481 sll $acc1,16,$acc1
482 xor $acc0,$s0,$s0
483 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
484 fmovs %f0,%f0
485 sll $acc2,8,$acc2 !
486 xor $acc1,$s0,$s0
487 sll $acc4,24,$acc4
488 xor $acc2,$s0,$s0
489 sll $acc5,16,$acc5
490 xor $acc7,$s1,$s1
491 sll $acc6,8,$acc6
492 xor $acc4,$s1,$s1
493 sll $acc8,24,$acc8 !
494 xor $acc5,$s1,$s1
495 sll $acc9,16,$acc9
496 xor $acc11,$s2,$s2
497 sll $acc10,8,$acc10
498 xor $acc6,$s1,$s1
499 sll $acc12,24,$acc12
500 xor $acc8,$s2,$s2
501 sll $acc13,16,$acc13 !
502 xor $acc9,$s2,$s2
503 sll $acc14,8,$acc14
504 xor $acc10,$s2,$s2
505 xor $acc12,$acc14,$acc14
506 xor $acc13,$s3,$s3
507 xor $acc14,$s3,$s3
508 xor $acc15,$s3,$s3
509
510 ret
511 restore
512.type _sparcv9_AES_encrypt,#function
513.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
514
515.align 32
516.globl AES_encrypt
517AES_encrypt:
518 or %o0,%o1,%g1
519 andcc %g1,3,%g0
520 bnz,pn %xcc,.Lunaligned_enc
521 save %sp,-$frame,%sp
522
523 ld [%i0+0],%o0
524 ld [%i0+4],%o1
525 ld [%i0+8],%o2
526 ld [%i0+12],%o3
527
4c78bc05 5281: call .+8
e22b8648 529 add %o7,AES_Te-1b,%o4
4c78bc05
AP
530 call _sparcv9_AES_encrypt
531 mov %i2,%o5
7395d852
AP
532
533 st %o0,[%i1+0]
534 st %o1,[%i1+4]
535 st %o2,[%i1+8]
536 st %o3,[%i1+12]
537
538 ret
539 restore
540
541.align 32
542.Lunaligned_enc:
543 ldub [%i0+0],%l0
544 ldub [%i0+1],%l1
545 ldub [%i0+2],%l2
546
547 sll %l0,24,%l0
548 ldub [%i0+3],%l3
549 sll %l1,16,%l1
550 ldub [%i0+4],%l4
551 sll %l2,8,%l2
552 or %l1,%l0,%l0
553 ldub [%i0+5],%l5
554 sll %l4,24,%l4
555 or %l3,%l2,%l2
556 ldub [%i0+6],%l6
557 sll %l5,16,%l5
558 or %l0,%l2,%o0
559 ldub [%i0+7],%l7
560
561 sll %l6,8,%l6
562 or %l5,%l4,%l4
563 ldub [%i0+8],%l0
564 or %l7,%l6,%l6
565 ldub [%i0+9],%l1
566 or %l4,%l6,%o1
567 ldub [%i0+10],%l2
568
569 sll %l0,24,%l0
570 ldub [%i0+11],%l3
571 sll %l1,16,%l1
572 ldub [%i0+12],%l4
573 sll %l2,8,%l2
574 or %l1,%l0,%l0
575 ldub [%i0+13],%l5
576 sll %l4,24,%l4
577 or %l3,%l2,%l2
578 ldub [%i0+14],%l6
579 sll %l5,16,%l5
580 or %l0,%l2,%o2
581 ldub [%i0+15],%l7
582
583 sll %l6,8,%l6
584 or %l5,%l4,%l4
585 or %l7,%l6,%l6
586 or %l4,%l6,%o3
587
4c78bc05 5881: call .+8
e22b8648 589 add %o7,AES_Te-1b,%o4
4c78bc05
AP
590 call _sparcv9_AES_encrypt
591 mov %i2,%o5
7395d852
AP
592
593 srl %o0,24,%l0
594 srl %o0,16,%l1
595 stb %l0,[%i1+0]
596 srl %o0,8,%l2
597 stb %l1,[%i1+1]
598 stb %l2,[%i1+2]
599 srl %o1,24,%l4
600 stb %o0,[%i1+3]
601
602 srl %o1,16,%l5
603 stb %l4,[%i1+4]
604 srl %o1,8,%l6
605 stb %l5,[%i1+5]
606 stb %l6,[%i1+6]
607 srl %o2,24,%l0
608 stb %o1,[%i1+7]
609
610 srl %o2,16,%l1
611 stb %l0,[%i1+8]
612 srl %o2,8,%l2
613 stb %l1,[%i1+9]
614 stb %l2,[%i1+10]
615 srl %o3,24,%l4
616 stb %o2,[%i1+11]
617
618 srl %o3,16,%l5
619 stb %l4,[%i1+12]
620 srl %o3,8,%l6
621 stb %l5,[%i1+13]
622 stb %l6,[%i1+14]
623 stb %o3,[%i1+15]
624
625 ret
626 restore
627.type AES_encrypt,#function
628.size AES_encrypt,(.-AES_encrypt)
629
630___
631
632$code.=<<___;
985e4c41 633.align 256
7395d852
AP
634AES_Td:
635___
636&_data_word(
637 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
638 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
639 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
640 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
641 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
642 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
643 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
644 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
645 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
646 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
647 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
648 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
649 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
650 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
651 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
652 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
653 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
654 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
655 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
656 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
657 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
658 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
659 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
660 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
661 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
662 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
663 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
664 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
665 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
666 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
667 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
668 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
669 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
670 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
671 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
672 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
673 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
674 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
675 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
676 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
677 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
678 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
679 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
680 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
681 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
682 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
683 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
684 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
685 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
686 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
687 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
688 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
689 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
690 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
691 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
692 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
693 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
694 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
695 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
696 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
697 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
698 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
699 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
700 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
701$code.=<<___;
702 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
703 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
704 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
705 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
706 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
707 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
708 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
709 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
710 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
711 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
712 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
713 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
714 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
715 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
716 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
717 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
718 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
719 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
720 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
721 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
722 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
723 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
724 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
725 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
726 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
727 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
728 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
729 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
730 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
731 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
732 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
733 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
734.type AES_Td,#object
735.size AES_Td,(.-AES_Td)
736
737.align 64
738.skip 16
739_sparcv9_AES_decrypt:
740 save %sp,-$frame-$locals,%sp
741 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
742 ld [$key+240],$rounds
743 ld [$key+0],$t0
744 ld [$key+4],$t1 !
745 ld [$key+8],$t2
746 ld [$key+12],$t3
747 srl $rounds,1,$rounds
748 xor $t0,$s0,$s0
749 ld [$key+16],$t0
750 xor $t1,$s1,$s1
751 ld [$key+20],$t1
752 srl $s0,21,$acc0 !
753 xor $t2,$s2,$s2
754 ld [$key+24],$t2
755 xor $t3,$s3,$s3
756 and $acc0,2040,$acc0
757 ld [$key+28],$t3
758 srl $s3,13,$acc1
759 nop
760.Ldec_loop:
761 srl $s2,5,$acc2 !
762 and $acc1,2040,$acc1
763 ldx [$tbl+$acc0],$acc0
764 sll $s1,3,$acc3
765 and $acc2,2040,$acc2
766 ldx [$tbl+$acc1],$acc1
767 srl $s1,21,$acc4
768 and $acc3,2040,$acc3
769 ldx [$tbl+$acc2],$acc2 !
770 srl $s0,13,$acc5
771 and $acc4,2040,$acc4
772 ldx [$tbl+$acc3],$acc3
773 srl $s3,5,$acc6
774 and $acc5,2040,$acc5
775 ldx [$tbl+$acc4],$acc4
776 fmovs %f0,%f0
777 sll $s2,3,$acc7 !
778 and $acc6,2040,$acc6
779 ldx [$tbl+$acc5],$acc5
780 srl $s2,21,$acc8
781 and $acc7,2040,$acc7
782 ldx [$tbl+$acc6],$acc6
783 srl $s1,13,$acc9
784 and $acc8,2040,$acc8
785 ldx [$tbl+$acc7],$acc7 !
786 srl $s0,5,$acc10
787 and $acc9,2040,$acc9
788 ldx [$tbl+$acc8],$acc8
789 sll $s3,3,$acc11
790 and $acc10,2040,$acc10
791 ldx [$tbl+$acc9],$acc9
792 fmovs %f0,%f0
793 srl $s3,21,$acc12 !
794 and $acc11,2040,$acc11
795 ldx [$tbl+$acc10],$acc10
796 srl $s2,13,$acc13
797 and $acc12,2040,$acc12
798 ldx [$tbl+$acc11],$acc11
799 srl $s1,5,$acc14
800 and $acc13,2040,$acc13
801 ldx [$tbl+$acc12],$acc12 !
802 sll $s0,3,$acc15
803 and $acc14,2040,$acc14
804 ldx [$tbl+$acc13],$acc13
805 and $acc15,2040,$acc15
806 add $key,32,$key
807 ldx [$tbl+$acc14],$acc14
808 fmovs %f0,%f0
809 subcc $rounds,1,$rounds !
810 ldx [$tbl+$acc15],$acc15
811 bz,a,pn %icc,.Ldec_last
812 add $tbl,2048,$rounds
813
814 srlx $acc1,8,$acc1
815 xor $acc0,$t0,$t0
816 ld [$key+0],$s0
817 fmovs %f0,%f0
818 srlx $acc2,16,$acc2 !
819 xor $acc1,$t0,$t0
820 ld [$key+4],$s1
821 srlx $acc3,24,$acc3
822 xor $acc2,$t0,$t0
823 ld [$key+8],$s2
824 srlx $acc5,8,$acc5
825 xor $acc3,$t0,$t0
826 ld [$key+12],$s3 !
827 srlx $acc6,16,$acc6
828 xor $acc4,$t1,$t1
829 fmovs %f0,%f0
830 srlx $acc7,24,$acc7
831 xor $acc5,$t1,$t1
832 srlx $acc9,8,$acc9
833 xor $acc6,$t1,$t1
834 srlx $acc10,16,$acc10 !
835 xor $acc7,$t1,$t1
836 srlx $acc11,24,$acc11
837 xor $acc8,$t2,$t2
838 srlx $acc13,8,$acc13
839 xor $acc9,$t2,$t2
840 srlx $acc14,16,$acc14
841 xor $acc10,$t2,$t2
842 srlx $acc15,24,$acc15 !
843 xor $acc11,$t2,$t2
844 xor $acc12,$acc14,$acc14
845 xor $acc13,$t3,$t3
846 srl $t0,21,$acc0
847 xor $acc14,$t3,$t3
848 xor $acc15,$t3,$t3
849 srl $t3,13,$acc1
850
851 and $acc0,2040,$acc0 !
852 srl $t2,5,$acc2
853 and $acc1,2040,$acc1
854 ldx [$tbl+$acc0],$acc0
855 sll $t1,3,$acc3
856 and $acc2,2040,$acc2
857 ldx [$tbl+$acc1],$acc1
858 fmovs %f0,%f0
859 srl $t1,21,$acc4 !
860 and $acc3,2040,$acc3
861 ldx [$tbl+$acc2],$acc2
862 srl $t0,13,$acc5
863 and $acc4,2040,$acc4
864 ldx [$tbl+$acc3],$acc3
865 srl $t3,5,$acc6
866 and $acc5,2040,$acc5
867 ldx [$tbl+$acc4],$acc4 !
868 sll $t2,3,$acc7
869 and $acc6,2040,$acc6
870 ldx [$tbl+$acc5],$acc5
871 srl $t2,21,$acc8
872 and $acc7,2040,$acc7
873 ldx [$tbl+$acc6],$acc6
874 fmovs %f0,%f0
875 srl $t1,13,$acc9 !
876 and $acc8,2040,$acc8
877 ldx [$tbl+$acc7],$acc7
878 srl $t0,5,$acc10
879 and $acc9,2040,$acc9
880 ldx [$tbl+$acc8],$acc8
881 sll $t3,3,$acc11
882 and $acc10,2040,$acc10
883 ldx [$tbl+$acc9],$acc9 !
884 srl $t3,21,$acc12
885 and $acc11,2040,$acc11
886 ldx [$tbl+$acc10],$acc10
887 srl $t2,13,$acc13
888 and $acc12,2040,$acc12
889 ldx [$tbl+$acc11],$acc11
890 fmovs %f0,%f0
891 srl $t1,5,$acc14 !
892 and $acc13,2040,$acc13
893 ldx [$tbl+$acc12],$acc12
894 sll $t0,3,$acc15
895 and $acc14,2040,$acc14
896 ldx [$tbl+$acc13],$acc13
897 srlx $acc1,8,$acc1
898 and $acc15,2040,$acc15
899 ldx [$tbl+$acc14],$acc14 !
900
901 srlx $acc2,16,$acc2
902 xor $acc0,$s0,$s0
903 ldx [$tbl+$acc15],$acc15
904 srlx $acc3,24,$acc3
905 xor $acc1,$s0,$s0
906 ld [$key+16],$t0
907 fmovs %f0,%f0
908 srlx $acc5,8,$acc5 !
909 xor $acc2,$s0,$s0
910 ld [$key+20],$t1
911 srlx $acc6,16,$acc6
912 xor $acc3,$s0,$s0
913 ld [$key+24],$t2
914 srlx $acc7,24,$acc7
915 xor $acc4,$s1,$s1
916 ld [$key+28],$t3 !
917 srlx $acc9,8,$acc9
918 xor $acc5,$s1,$s1
985e4c41 919 ldx [$tbl+2048+0],%g0 ! prefetch td4
7395d852
AP
920 srlx $acc10,16,$acc10
921 xor $acc6,$s1,$s1
985e4c41 922 ldx [$tbl+2048+32],%g0 ! prefetch td4
7395d852
AP
923 srlx $acc11,24,$acc11
924 xor $acc7,$s1,$s1
985e4c41 925 ldx [$tbl+2048+64],%g0 ! prefetch td4
7395d852
AP
926 srlx $acc13,8,$acc13
927 xor $acc8,$s2,$s2
985e4c41 928 ldx [$tbl+2048+96],%g0 ! prefetch td4
7395d852
AP
929 srlx $acc14,16,$acc14 !
930 xor $acc9,$s2,$s2
985e4c41 931 ldx [$tbl+2048+128],%g0 ! prefetch td4
7395d852
AP
932 srlx $acc15,24,$acc15
933 xor $acc10,$s2,$s2
985e4c41 934 ldx [$tbl+2048+160],%g0 ! prefetch td4
7395d852
AP
935 srl $s0,21,$acc0
936 xor $acc11,$s2,$s2
985e4c41 937 ldx [$tbl+2048+192],%g0 ! prefetch td4
7395d852
AP
938 xor $acc12,$acc14,$acc14
939 xor $acc13,$s3,$s3
985e4c41 940 ldx [$tbl+2048+224],%g0 ! prefetch td4
7395d852
AP
941 and $acc0,2040,$acc0 !
942 xor $acc14,$s3,$s3
943 xor $acc15,$s3,$s3
944 ba .Ldec_loop
945 srl $s3,13,$acc1
946
947.align 32
948.Ldec_last:
949 srlx $acc1,8,$acc1 !
950 xor $acc0,$t0,$t0
951 ld [$key+0],$s0
952 srlx $acc2,16,$acc2
953 xor $acc1,$t0,$t0
954 ld [$key+4],$s1
955 srlx $acc3,24,$acc3
956 xor $acc2,$t0,$t0
957 ld [$key+8],$s2 !
958 srlx $acc5,8,$acc5
959 xor $acc3,$t0,$t0
960 ld [$key+12],$s3
961 srlx $acc6,16,$acc6
962 xor $acc4,$t1,$t1
963 srlx $acc7,24,$acc7
964 xor $acc5,$t1,$t1
965 srlx $acc9,8,$acc9 !
966 xor $acc6,$t1,$t1
967 srlx $acc10,16,$acc10
968 xor $acc7,$t1,$t1
969 srlx $acc11,24,$acc11
970 xor $acc8,$t2,$t2
971 srlx $acc13,8,$acc13
972 xor $acc9,$t2,$t2
973 srlx $acc14,16,$acc14 !
974 xor $acc10,$t2,$t2
975 srlx $acc15,24,$acc15
976 xor $acc11,$t2,$t2
977 xor $acc12,$acc14,$acc14
978 xor $acc13,$t3,$t3
979 srl $t0,24,$acc0
980 xor $acc14,$t3,$t3
981 xor $acc15,$t3,$t3 !
982 srl $t3,16,$acc1
983
984 srl $t2,8,$acc2
985 and $acc1,255,$acc1
986 ldub [$rounds+$acc0],$acc0
987 srl $t1,24,$acc4
988 and $acc2,255,$acc2
989 ldub [$rounds+$acc1],$acc1
990 srl $t0,16,$acc5 !
991 and $t1,255,$acc3
992 ldub [$rounds+$acc2],$acc2
993 ldub [$rounds+$acc3],$acc3
994 srl $t3,8,$acc6
995 and $acc5,255,$acc5
996 ldub [$rounds+$acc4],$acc4
997 fmovs %f0,%f0
998 srl $t2,24,$acc8 !
999 and $acc6,255,$acc6
1000 ldub [$rounds+$acc5],$acc5
1001 srl $t1,16,$acc9
1002 and $t2,255,$acc7
1003 ldub [$rounds+$acc6],$acc6
1004 ldub [$rounds+$acc7],$acc7
1005 fmovs %f0,%f0
1006 srl $t0,8,$acc10 !
1007 and $acc9,255,$acc9
1008 ldub [$rounds+$acc8],$acc8
1009 srl $t3,24,$acc12
1010 and $acc10,255,$acc10
1011 ldub [$rounds+$acc9],$acc9
1012 srl $t2,16,$acc13
1013 and $t3,255,$acc11
1014 ldub [$rounds+$acc10],$acc10 !
1015 srl $t1,8,$acc14
1016 and $acc13,255,$acc13
1017 ldub [$rounds+$acc11],$acc11
1018 ldub [$rounds+$acc12],$acc12
1019 and $acc14,255,$acc14
1020 ldub [$rounds+$acc13],$acc13
1021 and $t0,255,$acc15
1022 ldub [$rounds+$acc14],$acc14 !
1023
1024 sll $acc0,24,$acc0
1025 xor $acc3,$s0,$s0
1026 ldub [$rounds+$acc15],$acc15
1027 sll $acc1,16,$acc1
1028 xor $acc0,$s0,$s0
1029 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1030 fmovs %f0,%f0
1031 sll $acc2,8,$acc2 !
1032 xor $acc1,$s0,$s0
1033 sll $acc4,24,$acc4
1034 xor $acc2,$s0,$s0
1035 sll $acc5,16,$acc5
1036 xor $acc7,$s1,$s1
1037 sll $acc6,8,$acc6
1038 xor $acc4,$s1,$s1
1039 sll $acc8,24,$acc8 !
1040 xor $acc5,$s1,$s1
1041 sll $acc9,16,$acc9
1042 xor $acc11,$s2,$s2
1043 sll $acc10,8,$acc10
1044 xor $acc6,$s1,$s1
1045 sll $acc12,24,$acc12
1046 xor $acc8,$s2,$s2
1047 sll $acc13,16,$acc13 !
1048 xor $acc9,$s2,$s2
1049 sll $acc14,8,$acc14
1050 xor $acc10,$s2,$s2
1051 xor $acc12,$acc14,$acc14
1052 xor $acc13,$s3,$s3
1053 xor $acc14,$s3,$s3
1054 xor $acc15,$s3,$s3
1055
1056 ret
1057 restore
1058.type _sparcv9_AES_decrypt,#function
1059.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1060
1061.align 32
1062.globl AES_decrypt
1063AES_decrypt:
1064 or %o0,%o1,%g1
1065 andcc %g1,3,%g0
1066 bnz,pn %xcc,.Lunaligned_dec
1067 save %sp,-$frame,%sp
1068
1069 ld [%i0+0],%o0
1070 ld [%i0+4],%o1
1071 ld [%i0+8],%o2
1072 ld [%i0+12],%o3
1073
4c78bc05 10741: call .+8
e22b8648 1075 add %o7,AES_Td-1b,%o4
4c78bc05
AP
1076 call _sparcv9_AES_decrypt
1077 mov %i2,%o5
7395d852
AP
1078
1079 st %o0,[%i1+0]
1080 st %o1,[%i1+4]
1081 st %o2,[%i1+8]
1082 st %o3,[%i1+12]
1083
1084 ret
1085 restore
1086
1087.align 32
1088.Lunaligned_dec:
1089 ldub [%i0+0],%l0
1090 ldub [%i0+1],%l1
1091 ldub [%i0+2],%l2
1092
1093 sll %l0,24,%l0
1094 ldub [%i0+3],%l3
1095 sll %l1,16,%l1
1096 ldub [%i0+4],%l4
1097 sll %l2,8,%l2
1098 or %l1,%l0,%l0
1099 ldub [%i0+5],%l5
1100 sll %l4,24,%l4
1101 or %l3,%l2,%l2
1102 ldub [%i0+6],%l6
1103 sll %l5,16,%l5
1104 or %l0,%l2,%o0
1105 ldub [%i0+7],%l7
1106
1107 sll %l6,8,%l6
1108 or %l5,%l4,%l4
1109 ldub [%i0+8],%l0
1110 or %l7,%l6,%l6
1111 ldub [%i0+9],%l1
1112 or %l4,%l6,%o1
1113 ldub [%i0+10],%l2
1114
1115 sll %l0,24,%l0
1116 ldub [%i0+11],%l3
1117 sll %l1,16,%l1
1118 ldub [%i0+12],%l4
1119 sll %l2,8,%l2
1120 or %l1,%l0,%l0
1121 ldub [%i0+13],%l5
1122 sll %l4,24,%l4
1123 or %l3,%l2,%l2
1124 ldub [%i0+14],%l6
1125 sll %l5,16,%l5
1126 or %l0,%l2,%o2
1127 ldub [%i0+15],%l7
1128
1129 sll %l6,8,%l6
1130 or %l5,%l4,%l4
1131 or %l7,%l6,%l6
1132 or %l4,%l6,%o3
1133
4c78bc05 11341: call .+8
e22b8648 1135 add %o7,AES_Td-1b,%o4
4c78bc05
AP
1136 call _sparcv9_AES_decrypt
1137 mov %i2,%o5
7395d852
AP
1138
1139 srl %o0,24,%l0
1140 srl %o0,16,%l1
1141 stb %l0,[%i1+0]
1142 srl %o0,8,%l2
1143 stb %l1,[%i1+1]
1144 stb %l2,[%i1+2]
1145 srl %o1,24,%l4
1146 stb %o0,[%i1+3]
1147
1148 srl %o1,16,%l5
1149 stb %l4,[%i1+4]
1150 srl %o1,8,%l6
1151 stb %l5,[%i1+5]
1152 stb %l6,[%i1+6]
1153 srl %o2,24,%l0
1154 stb %o1,[%i1+7]
1155
1156 srl %o2,16,%l1
1157 stb %l0,[%i1+8]
1158 srl %o2,8,%l2
1159 stb %l1,[%i1+9]
1160 stb %l2,[%i1+10]
1161 srl %o3,24,%l4
1162 stb %o2,[%i1+11]
1163
1164 srl %o3,16,%l5
1165 stb %l4,[%i1+12]
1166 srl %o3,8,%l6
1167 stb %l5,[%i1+13]
1168 stb %l6,[%i1+14]
1169 stb %o3,[%i1+15]
1170
1171 ret
1172 restore
1173.type AES_decrypt,#function
1174.size AES_decrypt,(.-AES_decrypt)
1175___
1176
1177# fmovs instructions substituting for FP nops were originally added
1178# to meet specific instruction alignment requirements to maximize ILP.
1179# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1180# undesired effect, so just omit them and sacrifice some portion of
1181# percent in performance...
e255024b 1182$code =~ s/fmovs.*$//gm;
7395d852
AP
1183
1184print $code;
e255024b 1185close STDOUT; # ensure flush