]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aes-sparcv9.pl
9b9a795dbf0dc68a01873fc5a08994a407de21a1
[thirdparty/openssl.git] / crypto / aes / asm / aes-sparcv9.pl
1 #! /usr/bin/env perl
2 # Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. Rights for redistribution and usage in source and binary
13 # forms are granted according to the License.
14 # ====================================================================
15 #
16 # Version 1.1
17 #
18 # The major reason for undertaken effort was to mitigate the hazard of
19 # cache-timing attack. This is [currently and initially!] addressed in
20 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
21 # 2. References to them are scheduled for L2 cache latency, meaning
22 # that the tables don't have to reside in L1 cache. Once again, this
23 # is an initial draft and one should expect more countermeasures to
24 # be implemented...
25 #
26 # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
27 # round.
28 #
29 # Even though performance was not the primary goal [on the contrary,
30 # extra shifts "induced" by compressed S-box and longer loop epilogue
31 # "induced" by scheduling for L2 have negative effect on performance],
32 # the code turned out to run in ~23 cycles per processed byte en-/
33 # decrypted with 128-bit key. This is pretty good result for code
34 # with mentioned qualities and UltraSPARC core. Compared to Sun C
35 # generated code my encrypt procedure runs just few percents faster,
36 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
37 # optimal decrypt procedure]. Compared to GNU C generated code both
38 # procedures are more than 60% faster:-)
39
40 $output = pop and open STDOUT,">$output";
41
42 $frame="STACK_FRAME";
43 $bias="STACK_BIAS";
44 $locals=16;
45
46 $acc0="%l0";
47 $acc1="%o0";
48 $acc2="%o1";
49 $acc3="%o2";
50
51 $acc4="%l1";
52 $acc5="%o3";
53 $acc6="%o4";
54 $acc7="%o5";
55
56 $acc8="%l2";
57 $acc9="%o7";
58 $acc10="%g1";
59 $acc11="%g2";
60
61 $acc12="%l3";
62 $acc13="%g3";
63 $acc14="%g4";
64 $acc15="%g5";
65
66 $t0="%l4";
67 $t1="%l5";
68 $t2="%l6";
69 $t3="%l7";
70
71 $s0="%i0";
72 $s1="%i1";
73 $s2="%i2";
74 $s3="%i3";
75 $tbl="%i4";
76 $key="%i5";
77 $rounds="%i7"; # aliases with return address, which is off-loaded to stack
78
79 sub _data_word()
80 { my $i;
81 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
82 }
83
84 $code.=<<___;
85 #include "sparc_arch.h"
86
87 #ifdef __arch64__
88 .register %g2,#scratch
89 .register %g3,#scratch
90 #endif
91 .section ".text",#alloc,#execinstr
92
93 .align 256
94 AES_Te:
95 ___
96 &_data_word(
97 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
98 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
99 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
100 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
101 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
102 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
103 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
104 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
105 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
106 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
107 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
108 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
109 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
110 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
111 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
112 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
113 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
114 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
115 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
116 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
117 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
118 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
119 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
120 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
121 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
122 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
123 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
124 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
125 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
126 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
127 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
128 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
129 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
130 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
131 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
132 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
133 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
134 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
135 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
136 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
137 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
138 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
139 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
140 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
141 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
142 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
143 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
144 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
145 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
146 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
147 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
148 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
149 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
150 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
151 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
152 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
153 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
154 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
155 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
156 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
157 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
158 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
159 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
160 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
161 $code.=<<___;
162 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
163 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
164 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
165 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
166 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
167 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
168 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
169 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
170 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
171 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
172 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
173 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
174 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
175 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
176 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
177 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
178 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
179 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
180 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
181 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
182 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
183 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
184 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
185 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
186 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
187 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
188 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
189 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
190 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
191 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
192 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
193 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
194 .type AES_Te,#object
195 .size AES_Te,(.-AES_Te)
196
197 .align 64
198 .skip 16
199 _sparcv9_AES_encrypt:
200 save %sp,-$frame-$locals,%sp
201 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
202 ld [$key+240],$rounds
203 ld [$key+0],$t0
204 ld [$key+4],$t1 !
205 ld [$key+8],$t2
206 srl $rounds,1,$rounds
207 xor $t0,$s0,$s0
208 ld [$key+12],$t3
209 srl $s0,21,$acc0
210 xor $t1,$s1,$s1
211 ld [$key+16],$t0
212 srl $s1,13,$acc1 !
213 xor $t2,$s2,$s2
214 ld [$key+20],$t1
215 xor $t3,$s3,$s3
216 ld [$key+24],$t2
217 and $acc0,2040,$acc0
218 ld [$key+28],$t3
219 nop
220 .Lenc_loop:
221 srl $s2,5,$acc2 !
222 and $acc1,2040,$acc1
223 ldx [$tbl+$acc0],$acc0
224 sll $s3,3,$acc3
225 and $acc2,2040,$acc2
226 ldx [$tbl+$acc1],$acc1
227 srl $s1,21,$acc4
228 and $acc3,2040,$acc3
229 ldx [$tbl+$acc2],$acc2 !
230 srl $s2,13,$acc5
231 and $acc4,2040,$acc4
232 ldx [$tbl+$acc3],$acc3
233 srl $s3,5,$acc6
234 and $acc5,2040,$acc5
235 ldx [$tbl+$acc4],$acc4
236 fmovs %f0,%f0
237 sll $s0,3,$acc7 !
238 and $acc6,2040,$acc6
239 ldx [$tbl+$acc5],$acc5
240 srl $s2,21,$acc8
241 and $acc7,2040,$acc7
242 ldx [$tbl+$acc6],$acc6
243 srl $s3,13,$acc9
244 and $acc8,2040,$acc8
245 ldx [$tbl+$acc7],$acc7 !
246 srl $s0,5,$acc10
247 and $acc9,2040,$acc9
248 ldx [$tbl+$acc8],$acc8
249 sll $s1,3,$acc11
250 and $acc10,2040,$acc10
251 ldx [$tbl+$acc9],$acc9
252 fmovs %f0,%f0
253 srl $s3,21,$acc12 !
254 and $acc11,2040,$acc11
255 ldx [$tbl+$acc10],$acc10
256 srl $s0,13,$acc13
257 and $acc12,2040,$acc12
258 ldx [$tbl+$acc11],$acc11
259 srl $s1,5,$acc14
260 and $acc13,2040,$acc13
261 ldx [$tbl+$acc12],$acc12 !
262 sll $s2,3,$acc15
263 and $acc14,2040,$acc14
264 ldx [$tbl+$acc13],$acc13
265 and $acc15,2040,$acc15
266 add $key,32,$key
267 ldx [$tbl+$acc14],$acc14
268 fmovs %f0,%f0
269 subcc $rounds,1,$rounds !
270 ldx [$tbl+$acc15],$acc15
271 bz,a,pn %icc,.Lenc_last
272 add $tbl,2048,$rounds
273
274 srlx $acc1,8,$acc1
275 xor $acc0,$t0,$t0
276 ld [$key+0],$s0
277 fmovs %f0,%f0
278 srlx $acc2,16,$acc2 !
279 xor $acc1,$t0,$t0
280 ld [$key+4],$s1
281 srlx $acc3,24,$acc3
282 xor $acc2,$t0,$t0
283 ld [$key+8],$s2
284 srlx $acc5,8,$acc5
285 xor $acc3,$t0,$t0
286 ld [$key+12],$s3 !
287 srlx $acc6,16,$acc6
288 xor $acc4,$t1,$t1
289 fmovs %f0,%f0
290 srlx $acc7,24,$acc7
291 xor $acc5,$t1,$t1
292 srlx $acc9,8,$acc9
293 xor $acc6,$t1,$t1
294 srlx $acc10,16,$acc10 !
295 xor $acc7,$t1,$t1
296 srlx $acc11,24,$acc11
297 xor $acc8,$t2,$t2
298 srlx $acc13,8,$acc13
299 xor $acc9,$t2,$t2
300 srlx $acc14,16,$acc14
301 xor $acc10,$t2,$t2
302 srlx $acc15,24,$acc15 !
303 xor $acc11,$t2,$t2
304 xor $acc12,$acc14,$acc14
305 xor $acc13,$t3,$t3
306 srl $t0,21,$acc0
307 xor $acc14,$t3,$t3
308 srl $t1,13,$acc1
309 xor $acc15,$t3,$t3
310
311 and $acc0,2040,$acc0 !
312 srl $t2,5,$acc2
313 and $acc1,2040,$acc1
314 ldx [$tbl+$acc0],$acc0
315 sll $t3,3,$acc3
316 and $acc2,2040,$acc2
317 ldx [$tbl+$acc1],$acc1
318 fmovs %f0,%f0
319 srl $t1,21,$acc4 !
320 and $acc3,2040,$acc3
321 ldx [$tbl+$acc2],$acc2
322 srl $t2,13,$acc5
323 and $acc4,2040,$acc4
324 ldx [$tbl+$acc3],$acc3
325 srl $t3,5,$acc6
326 and $acc5,2040,$acc5
327 ldx [$tbl+$acc4],$acc4 !
328 sll $t0,3,$acc7
329 and $acc6,2040,$acc6
330 ldx [$tbl+$acc5],$acc5
331 srl $t2,21,$acc8
332 and $acc7,2040,$acc7
333 ldx [$tbl+$acc6],$acc6
334 fmovs %f0,%f0
335 srl $t3,13,$acc9 !
336 and $acc8,2040,$acc8
337 ldx [$tbl+$acc7],$acc7
338 srl $t0,5,$acc10
339 and $acc9,2040,$acc9
340 ldx [$tbl+$acc8],$acc8
341 sll $t1,3,$acc11
342 and $acc10,2040,$acc10
343 ldx [$tbl+$acc9],$acc9 !
344 srl $t3,21,$acc12
345 and $acc11,2040,$acc11
346 ldx [$tbl+$acc10],$acc10
347 srl $t0,13,$acc13
348 and $acc12,2040,$acc12
349 ldx [$tbl+$acc11],$acc11
350 fmovs %f0,%f0
351 srl $t1,5,$acc14 !
352 and $acc13,2040,$acc13
353 ldx [$tbl+$acc12],$acc12
354 sll $t2,3,$acc15
355 and $acc14,2040,$acc14
356 ldx [$tbl+$acc13],$acc13
357 srlx $acc1,8,$acc1
358 and $acc15,2040,$acc15
359 ldx [$tbl+$acc14],$acc14 !
360
361 srlx $acc2,16,$acc2
362 xor $acc0,$s0,$s0
363 ldx [$tbl+$acc15],$acc15
364 srlx $acc3,24,$acc3
365 xor $acc1,$s0,$s0
366 ld [$key+16],$t0
367 fmovs %f0,%f0
368 srlx $acc5,8,$acc5 !
369 xor $acc2,$s0,$s0
370 ld [$key+20],$t1
371 srlx $acc6,16,$acc6
372 xor $acc3,$s0,$s0
373 ld [$key+24],$t2
374 srlx $acc7,24,$acc7
375 xor $acc4,$s1,$s1
376 ld [$key+28],$t3 !
377 srlx $acc9,8,$acc9
378 xor $acc5,$s1,$s1
379 ldx [$tbl+2048+0],%g0 ! prefetch te4
380 srlx $acc10,16,$acc10
381 xor $acc6,$s1,$s1
382 ldx [$tbl+2048+32],%g0 ! prefetch te4
383 srlx $acc11,24,$acc11
384 xor $acc7,$s1,$s1
385 ldx [$tbl+2048+64],%g0 ! prefetch te4
386 srlx $acc13,8,$acc13
387 xor $acc8,$s2,$s2
388 ldx [$tbl+2048+96],%g0 ! prefetch te4
389 srlx $acc14,16,$acc14 !
390 xor $acc9,$s2,$s2
391 ldx [$tbl+2048+128],%g0 ! prefetch te4
392 srlx $acc15,24,$acc15
393 xor $acc10,$s2,$s2
394 ldx [$tbl+2048+160],%g0 ! prefetch te4
395 srl $s0,21,$acc0
396 xor $acc11,$s2,$s2
397 ldx [$tbl+2048+192],%g0 ! prefetch te4
398 xor $acc12,$acc14,$acc14
399 xor $acc13,$s3,$s3
400 ldx [$tbl+2048+224],%g0 ! prefetch te4
401 srl $s1,13,$acc1 !
402 xor $acc14,$s3,$s3
403 xor $acc15,$s3,$s3
404 ba .Lenc_loop
405 and $acc0,2040,$acc0
406
407 .align 32
408 .Lenc_last:
409 srlx $acc1,8,$acc1 !
410 xor $acc0,$t0,$t0
411 ld [$key+0],$s0
412 srlx $acc2,16,$acc2
413 xor $acc1,$t0,$t0
414 ld [$key+4],$s1
415 srlx $acc3,24,$acc3
416 xor $acc2,$t0,$t0
417 ld [$key+8],$s2 !
418 srlx $acc5,8,$acc5
419 xor $acc3,$t0,$t0
420 ld [$key+12],$s3
421 srlx $acc6,16,$acc6
422 xor $acc4,$t1,$t1
423 srlx $acc7,24,$acc7
424 xor $acc5,$t1,$t1
425 srlx $acc9,8,$acc9 !
426 xor $acc6,$t1,$t1
427 srlx $acc10,16,$acc10
428 xor $acc7,$t1,$t1
429 srlx $acc11,24,$acc11
430 xor $acc8,$t2,$t2
431 srlx $acc13,8,$acc13
432 xor $acc9,$t2,$t2
433 srlx $acc14,16,$acc14 !
434 xor $acc10,$t2,$t2
435 srlx $acc15,24,$acc15
436 xor $acc11,$t2,$t2
437 xor $acc12,$acc14,$acc14
438 xor $acc13,$t3,$t3
439 srl $t0,24,$acc0
440 xor $acc14,$t3,$t3
441 srl $t1,16,$acc1 !
442 xor $acc15,$t3,$t3
443
444 srl $t2,8,$acc2
445 and $acc1,255,$acc1
446 ldub [$rounds+$acc0],$acc0
447 srl $t1,24,$acc4
448 and $acc2,255,$acc2
449 ldub [$rounds+$acc1],$acc1
450 srl $t2,16,$acc5 !
451 and $t3,255,$acc3
452 ldub [$rounds+$acc2],$acc2
453 ldub [$rounds+$acc3],$acc3
454 srl $t3,8,$acc6
455 and $acc5,255,$acc5
456 ldub [$rounds+$acc4],$acc4
457 fmovs %f0,%f0
458 srl $t2,24,$acc8 !
459 and $acc6,255,$acc6
460 ldub [$rounds+$acc5],$acc5
461 srl $t3,16,$acc9
462 and $t0,255,$acc7
463 ldub [$rounds+$acc6],$acc6
464 ldub [$rounds+$acc7],$acc7
465 fmovs %f0,%f0
466 srl $t0,8,$acc10 !
467 and $acc9,255,$acc9
468 ldub [$rounds+$acc8],$acc8
469 srl $t3,24,$acc12
470 and $acc10,255,$acc10
471 ldub [$rounds+$acc9],$acc9
472 srl $t0,16,$acc13
473 and $t1,255,$acc11
474 ldub [$rounds+$acc10],$acc10 !
475 srl $t1,8,$acc14
476 and $acc13,255,$acc13
477 ldub [$rounds+$acc11],$acc11
478 ldub [$rounds+$acc12],$acc12
479 and $acc14,255,$acc14
480 ldub [$rounds+$acc13],$acc13
481 and $t2,255,$acc15
482 ldub [$rounds+$acc14],$acc14 !
483
484 sll $acc0,24,$acc0
485 xor $acc3,$s0,$s0
486 ldub [$rounds+$acc15],$acc15
487 sll $acc1,16,$acc1
488 xor $acc0,$s0,$s0
489 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
490 fmovs %f0,%f0
491 sll $acc2,8,$acc2 !
492 xor $acc1,$s0,$s0
493 sll $acc4,24,$acc4
494 xor $acc2,$s0,$s0
495 sll $acc5,16,$acc5
496 xor $acc7,$s1,$s1
497 sll $acc6,8,$acc6
498 xor $acc4,$s1,$s1
499 sll $acc8,24,$acc8 !
500 xor $acc5,$s1,$s1
501 sll $acc9,16,$acc9
502 xor $acc11,$s2,$s2
503 sll $acc10,8,$acc10
504 xor $acc6,$s1,$s1
505 sll $acc12,24,$acc12
506 xor $acc8,$s2,$s2
507 sll $acc13,16,$acc13 !
508 xor $acc9,$s2,$s2
509 sll $acc14,8,$acc14
510 xor $acc10,$s2,$s2
511 xor $acc12,$acc14,$acc14
512 xor $acc13,$s3,$s3
513 xor $acc14,$s3,$s3
514 xor $acc15,$s3,$s3
515
516 ret
517 restore
518 .type _sparcv9_AES_encrypt,#function
519 .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
520
521 .align 32
522 .globl AES_encrypt
523 AES_encrypt:
524 or %o0,%o1,%g1
525 andcc %g1,3,%g0
526 bnz,pn %xcc,.Lunaligned_enc
527 save %sp,-$frame,%sp
528
529 ld [%i0+0],%o0
530 ld [%i0+4],%o1
531 ld [%i0+8],%o2
532 ld [%i0+12],%o3
533
534 1: call .+8
535 add %o7,AES_Te-1b,%o4
536 call _sparcv9_AES_encrypt
537 mov %i2,%o5
538
539 st %o0,[%i1+0]
540 st %o1,[%i1+4]
541 st %o2,[%i1+8]
542 st %o3,[%i1+12]
543
544 ret
545 restore
546
547 .align 32
548 .Lunaligned_enc:
549 ldub [%i0+0],%l0
550 ldub [%i0+1],%l1
551 ldub [%i0+2],%l2
552
553 sll %l0,24,%l0
554 ldub [%i0+3],%l3
555 sll %l1,16,%l1
556 ldub [%i0+4],%l4
557 sll %l2,8,%l2
558 or %l1,%l0,%l0
559 ldub [%i0+5],%l5
560 sll %l4,24,%l4
561 or %l3,%l2,%l2
562 ldub [%i0+6],%l6
563 sll %l5,16,%l5
564 or %l0,%l2,%o0
565 ldub [%i0+7],%l7
566
567 sll %l6,8,%l6
568 or %l5,%l4,%l4
569 ldub [%i0+8],%l0
570 or %l7,%l6,%l6
571 ldub [%i0+9],%l1
572 or %l4,%l6,%o1
573 ldub [%i0+10],%l2
574
575 sll %l0,24,%l0
576 ldub [%i0+11],%l3
577 sll %l1,16,%l1
578 ldub [%i0+12],%l4
579 sll %l2,8,%l2
580 or %l1,%l0,%l0
581 ldub [%i0+13],%l5
582 sll %l4,24,%l4
583 or %l3,%l2,%l2
584 ldub [%i0+14],%l6
585 sll %l5,16,%l5
586 or %l0,%l2,%o2
587 ldub [%i0+15],%l7
588
589 sll %l6,8,%l6
590 or %l5,%l4,%l4
591 or %l7,%l6,%l6
592 or %l4,%l6,%o3
593
594 1: call .+8
595 add %o7,AES_Te-1b,%o4
596 call _sparcv9_AES_encrypt
597 mov %i2,%o5
598
599 srl %o0,24,%l0
600 srl %o0,16,%l1
601 stb %l0,[%i1+0]
602 srl %o0,8,%l2
603 stb %l1,[%i1+1]
604 stb %l2,[%i1+2]
605 srl %o1,24,%l4
606 stb %o0,[%i1+3]
607
608 srl %o1,16,%l5
609 stb %l4,[%i1+4]
610 srl %o1,8,%l6
611 stb %l5,[%i1+5]
612 stb %l6,[%i1+6]
613 srl %o2,24,%l0
614 stb %o1,[%i1+7]
615
616 srl %o2,16,%l1
617 stb %l0,[%i1+8]
618 srl %o2,8,%l2
619 stb %l1,[%i1+9]
620 stb %l2,[%i1+10]
621 srl %o3,24,%l4
622 stb %o2,[%i1+11]
623
624 srl %o3,16,%l5
625 stb %l4,[%i1+12]
626 srl %o3,8,%l6
627 stb %l5,[%i1+13]
628 stb %l6,[%i1+14]
629 stb %o3,[%i1+15]
630
631 ret
632 restore
633 .type AES_encrypt,#function
634 .size AES_encrypt,(.-AES_encrypt)
635
636 ___
637
638 $code.=<<___;
639 .align 256
640 AES_Td:
641 ___
642 &_data_word(
643 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
644 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
645 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
646 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
647 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
648 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
649 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
650 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
651 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
652 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
653 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
654 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
655 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
656 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
657 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
658 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
659 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
660 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
661 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
662 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
663 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
664 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
665 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
666 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
667 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
668 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
669 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
670 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
671 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
672 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
673 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
674 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
675 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
676 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
677 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
678 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
679 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
680 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
681 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
682 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
683 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
684 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
685 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
686 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
687 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
688 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
689 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
690 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
691 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
692 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
693 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
694 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
695 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
696 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
697 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
698 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
699 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
700 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
701 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
702 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
703 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
704 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
705 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
706 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
707 $code.=<<___;
708 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
709 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
710 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
711 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
712 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
713 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
714 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
715 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
716 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
717 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
718 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
719 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
720 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
721 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
722 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
723 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
724 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
725 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
726 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
727 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
728 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
729 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
730 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
731 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
732 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
733 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
734 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
735 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
736 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
737 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
738 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
739 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
740 .type AES_Td,#object
741 .size AES_Td,(.-AES_Td)
742
743 .align 64
744 .skip 16
745 _sparcv9_AES_decrypt:
746 save %sp,-$frame-$locals,%sp
747 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
748 ld [$key+240],$rounds
749 ld [$key+0],$t0
750 ld [$key+4],$t1 !
751 ld [$key+8],$t2
752 ld [$key+12],$t3
753 srl $rounds,1,$rounds
754 xor $t0,$s0,$s0
755 ld [$key+16],$t0
756 xor $t1,$s1,$s1
757 ld [$key+20],$t1
758 srl $s0,21,$acc0 !
759 xor $t2,$s2,$s2
760 ld [$key+24],$t2
761 xor $t3,$s3,$s3
762 and $acc0,2040,$acc0
763 ld [$key+28],$t3
764 srl $s3,13,$acc1
765 nop
766 .Ldec_loop:
767 srl $s2,5,$acc2 !
768 and $acc1,2040,$acc1
769 ldx [$tbl+$acc0],$acc0
770 sll $s1,3,$acc3
771 and $acc2,2040,$acc2
772 ldx [$tbl+$acc1],$acc1
773 srl $s1,21,$acc4
774 and $acc3,2040,$acc3
775 ldx [$tbl+$acc2],$acc2 !
776 srl $s0,13,$acc5
777 and $acc4,2040,$acc4
778 ldx [$tbl+$acc3],$acc3
779 srl $s3,5,$acc6
780 and $acc5,2040,$acc5
781 ldx [$tbl+$acc4],$acc4
782 fmovs %f0,%f0
783 sll $s2,3,$acc7 !
784 and $acc6,2040,$acc6
785 ldx [$tbl+$acc5],$acc5
786 srl $s2,21,$acc8
787 and $acc7,2040,$acc7
788 ldx [$tbl+$acc6],$acc6
789 srl $s1,13,$acc9
790 and $acc8,2040,$acc8
791 ldx [$tbl+$acc7],$acc7 !
792 srl $s0,5,$acc10
793 and $acc9,2040,$acc9
794 ldx [$tbl+$acc8],$acc8
795 sll $s3,3,$acc11
796 and $acc10,2040,$acc10
797 ldx [$tbl+$acc9],$acc9
798 fmovs %f0,%f0
799 srl $s3,21,$acc12 !
800 and $acc11,2040,$acc11
801 ldx [$tbl+$acc10],$acc10
802 srl $s2,13,$acc13
803 and $acc12,2040,$acc12
804 ldx [$tbl+$acc11],$acc11
805 srl $s1,5,$acc14
806 and $acc13,2040,$acc13
807 ldx [$tbl+$acc12],$acc12 !
808 sll $s0,3,$acc15
809 and $acc14,2040,$acc14
810 ldx [$tbl+$acc13],$acc13
811 and $acc15,2040,$acc15
812 add $key,32,$key
813 ldx [$tbl+$acc14],$acc14
814 fmovs %f0,%f0
815 subcc $rounds,1,$rounds !
816 ldx [$tbl+$acc15],$acc15
817 bz,a,pn %icc,.Ldec_last
818 add $tbl,2048,$rounds
819
820 srlx $acc1,8,$acc1
821 xor $acc0,$t0,$t0
822 ld [$key+0],$s0
823 fmovs %f0,%f0
824 srlx $acc2,16,$acc2 !
825 xor $acc1,$t0,$t0
826 ld [$key+4],$s1
827 srlx $acc3,24,$acc3
828 xor $acc2,$t0,$t0
829 ld [$key+8],$s2
830 srlx $acc5,8,$acc5
831 xor $acc3,$t0,$t0
832 ld [$key+12],$s3 !
833 srlx $acc6,16,$acc6
834 xor $acc4,$t1,$t1
835 fmovs %f0,%f0
836 srlx $acc7,24,$acc7
837 xor $acc5,$t1,$t1
838 srlx $acc9,8,$acc9
839 xor $acc6,$t1,$t1
840 srlx $acc10,16,$acc10 !
841 xor $acc7,$t1,$t1
842 srlx $acc11,24,$acc11
843 xor $acc8,$t2,$t2
844 srlx $acc13,8,$acc13
845 xor $acc9,$t2,$t2
846 srlx $acc14,16,$acc14
847 xor $acc10,$t2,$t2
848 srlx $acc15,24,$acc15 !
849 xor $acc11,$t2,$t2
850 xor $acc12,$acc14,$acc14
851 xor $acc13,$t3,$t3
852 srl $t0,21,$acc0
853 xor $acc14,$t3,$t3
854 xor $acc15,$t3,$t3
855 srl $t3,13,$acc1
856
857 and $acc0,2040,$acc0 !
858 srl $t2,5,$acc2
859 and $acc1,2040,$acc1
860 ldx [$tbl+$acc0],$acc0
861 sll $t1,3,$acc3
862 and $acc2,2040,$acc2
863 ldx [$tbl+$acc1],$acc1
864 fmovs %f0,%f0
865 srl $t1,21,$acc4 !
866 and $acc3,2040,$acc3
867 ldx [$tbl+$acc2],$acc2
868 srl $t0,13,$acc5
869 and $acc4,2040,$acc4
870 ldx [$tbl+$acc3],$acc3
871 srl $t3,5,$acc6
872 and $acc5,2040,$acc5
873 ldx [$tbl+$acc4],$acc4 !
874 sll $t2,3,$acc7
875 and $acc6,2040,$acc6
876 ldx [$tbl+$acc5],$acc5
877 srl $t2,21,$acc8
878 and $acc7,2040,$acc7
879 ldx [$tbl+$acc6],$acc6
880 fmovs %f0,%f0
881 srl $t1,13,$acc9 !
882 and $acc8,2040,$acc8
883 ldx [$tbl+$acc7],$acc7
884 srl $t0,5,$acc10
885 and $acc9,2040,$acc9
886 ldx [$tbl+$acc8],$acc8
887 sll $t3,3,$acc11
888 and $acc10,2040,$acc10
889 ldx [$tbl+$acc9],$acc9 !
890 srl $t3,21,$acc12
891 and $acc11,2040,$acc11
892 ldx [$tbl+$acc10],$acc10
893 srl $t2,13,$acc13
894 and $acc12,2040,$acc12
895 ldx [$tbl+$acc11],$acc11
896 fmovs %f0,%f0
897 srl $t1,5,$acc14 !
898 and $acc13,2040,$acc13
899 ldx [$tbl+$acc12],$acc12
900 sll $t0,3,$acc15
901 and $acc14,2040,$acc14
902 ldx [$tbl+$acc13],$acc13
903 srlx $acc1,8,$acc1
904 and $acc15,2040,$acc15
905 ldx [$tbl+$acc14],$acc14 !
906
907 srlx $acc2,16,$acc2
908 xor $acc0,$s0,$s0
909 ldx [$tbl+$acc15],$acc15
910 srlx $acc3,24,$acc3
911 xor $acc1,$s0,$s0
912 ld [$key+16],$t0
913 fmovs %f0,%f0
914 srlx $acc5,8,$acc5 !
915 xor $acc2,$s0,$s0
916 ld [$key+20],$t1
917 srlx $acc6,16,$acc6
918 xor $acc3,$s0,$s0
919 ld [$key+24],$t2
920 srlx $acc7,24,$acc7
921 xor $acc4,$s1,$s1
922 ld [$key+28],$t3 !
923 srlx $acc9,8,$acc9
924 xor $acc5,$s1,$s1
925 ldx [$tbl+2048+0],%g0 ! prefetch td4
926 srlx $acc10,16,$acc10
927 xor $acc6,$s1,$s1
928 ldx [$tbl+2048+32],%g0 ! prefetch td4
929 srlx $acc11,24,$acc11
930 xor $acc7,$s1,$s1
931 ldx [$tbl+2048+64],%g0 ! prefetch td4
932 srlx $acc13,8,$acc13
933 xor $acc8,$s2,$s2
934 ldx [$tbl+2048+96],%g0 ! prefetch td4
935 srlx $acc14,16,$acc14 !
936 xor $acc9,$s2,$s2
937 ldx [$tbl+2048+128],%g0 ! prefetch td4
938 srlx $acc15,24,$acc15
939 xor $acc10,$s2,$s2
940 ldx [$tbl+2048+160],%g0 ! prefetch td4
941 srl $s0,21,$acc0
942 xor $acc11,$s2,$s2
943 ldx [$tbl+2048+192],%g0 ! prefetch td4
944 xor $acc12,$acc14,$acc14
945 xor $acc13,$s3,$s3
946 ldx [$tbl+2048+224],%g0 ! prefetch td4
947 and $acc0,2040,$acc0 !
948 xor $acc14,$s3,$s3
949 xor $acc15,$s3,$s3
950 ba .Ldec_loop
951 srl $s3,13,$acc1
952
953 .align 32
954 .Ldec_last:
955 srlx $acc1,8,$acc1 !
956 xor $acc0,$t0,$t0
957 ld [$key+0],$s0
958 srlx $acc2,16,$acc2
959 xor $acc1,$t0,$t0
960 ld [$key+4],$s1
961 srlx $acc3,24,$acc3
962 xor $acc2,$t0,$t0
963 ld [$key+8],$s2 !
964 srlx $acc5,8,$acc5
965 xor $acc3,$t0,$t0
966 ld [$key+12],$s3
967 srlx $acc6,16,$acc6
968 xor $acc4,$t1,$t1
969 srlx $acc7,24,$acc7
970 xor $acc5,$t1,$t1
971 srlx $acc9,8,$acc9 !
972 xor $acc6,$t1,$t1
973 srlx $acc10,16,$acc10
974 xor $acc7,$t1,$t1
975 srlx $acc11,24,$acc11
976 xor $acc8,$t2,$t2
977 srlx $acc13,8,$acc13
978 xor $acc9,$t2,$t2
979 srlx $acc14,16,$acc14 !
980 xor $acc10,$t2,$t2
981 srlx $acc15,24,$acc15
982 xor $acc11,$t2,$t2
983 xor $acc12,$acc14,$acc14
984 xor $acc13,$t3,$t3
985 srl $t0,24,$acc0
986 xor $acc14,$t3,$t3
987 xor $acc15,$t3,$t3 !
988 srl $t3,16,$acc1
989
990 srl $t2,8,$acc2
991 and $acc1,255,$acc1
992 ldub [$rounds+$acc0],$acc0
993 srl $t1,24,$acc4
994 and $acc2,255,$acc2
995 ldub [$rounds+$acc1],$acc1
996 srl $t0,16,$acc5 !
997 and $t1,255,$acc3
998 ldub [$rounds+$acc2],$acc2
999 ldub [$rounds+$acc3],$acc3
1000 srl $t3,8,$acc6
1001 and $acc5,255,$acc5
1002 ldub [$rounds+$acc4],$acc4
1003 fmovs %f0,%f0
1004 srl $t2,24,$acc8 !
1005 and $acc6,255,$acc6
1006 ldub [$rounds+$acc5],$acc5
1007 srl $t1,16,$acc9
1008 and $t2,255,$acc7
1009 ldub [$rounds+$acc6],$acc6
1010 ldub [$rounds+$acc7],$acc7
1011 fmovs %f0,%f0
1012 srl $t0,8,$acc10 !
1013 and $acc9,255,$acc9
1014 ldub [$rounds+$acc8],$acc8
1015 srl $t3,24,$acc12
1016 and $acc10,255,$acc10
1017 ldub [$rounds+$acc9],$acc9
1018 srl $t2,16,$acc13
1019 and $t3,255,$acc11
1020 ldub [$rounds+$acc10],$acc10 !
1021 srl $t1,8,$acc14
1022 and $acc13,255,$acc13
1023 ldub [$rounds+$acc11],$acc11
1024 ldub [$rounds+$acc12],$acc12
1025 and $acc14,255,$acc14
1026 ldub [$rounds+$acc13],$acc13
1027 and $t0,255,$acc15
1028 ldub [$rounds+$acc14],$acc14 !
1029
1030 sll $acc0,24,$acc0
1031 xor $acc3,$s0,$s0
1032 ldub [$rounds+$acc15],$acc15
1033 sll $acc1,16,$acc1
1034 xor $acc0,$s0,$s0
1035 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1036 fmovs %f0,%f0
1037 sll $acc2,8,$acc2 !
1038 xor $acc1,$s0,$s0
1039 sll $acc4,24,$acc4
1040 xor $acc2,$s0,$s0
1041 sll $acc5,16,$acc5
1042 xor $acc7,$s1,$s1
1043 sll $acc6,8,$acc6
1044 xor $acc4,$s1,$s1
1045 sll $acc8,24,$acc8 !
1046 xor $acc5,$s1,$s1
1047 sll $acc9,16,$acc9
1048 xor $acc11,$s2,$s2
1049 sll $acc10,8,$acc10
1050 xor $acc6,$s1,$s1
1051 sll $acc12,24,$acc12
1052 xor $acc8,$s2,$s2
1053 sll $acc13,16,$acc13 !
1054 xor $acc9,$s2,$s2
1055 sll $acc14,8,$acc14
1056 xor $acc10,$s2,$s2
1057 xor $acc12,$acc14,$acc14
1058 xor $acc13,$s3,$s3
1059 xor $acc14,$s3,$s3
1060 xor $acc15,$s3,$s3
1061
1062 ret
1063 restore
1064 .type _sparcv9_AES_decrypt,#function
1065 .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1066
1067 .align 32
1068 .globl AES_decrypt
1069 AES_decrypt:
1070 or %o0,%o1,%g1
1071 andcc %g1,3,%g0
1072 bnz,pn %xcc,.Lunaligned_dec
1073 save %sp,-$frame,%sp
1074
1075 ld [%i0+0],%o0
1076 ld [%i0+4],%o1
1077 ld [%i0+8],%o2
1078 ld [%i0+12],%o3
1079
1080 1: call .+8
1081 add %o7,AES_Td-1b,%o4
1082 call _sparcv9_AES_decrypt
1083 mov %i2,%o5
1084
1085 st %o0,[%i1+0]
1086 st %o1,[%i1+4]
1087 st %o2,[%i1+8]
1088 st %o3,[%i1+12]
1089
1090 ret
1091 restore
1092
1093 .align 32
1094 .Lunaligned_dec:
1095 ldub [%i0+0],%l0
1096 ldub [%i0+1],%l1
1097 ldub [%i0+2],%l2
1098
1099 sll %l0,24,%l0
1100 ldub [%i0+3],%l3
1101 sll %l1,16,%l1
1102 ldub [%i0+4],%l4
1103 sll %l2,8,%l2
1104 or %l1,%l0,%l0
1105 ldub [%i0+5],%l5
1106 sll %l4,24,%l4
1107 or %l3,%l2,%l2
1108 ldub [%i0+6],%l6
1109 sll %l5,16,%l5
1110 or %l0,%l2,%o0
1111 ldub [%i0+7],%l7
1112
1113 sll %l6,8,%l6
1114 or %l5,%l4,%l4
1115 ldub [%i0+8],%l0
1116 or %l7,%l6,%l6
1117 ldub [%i0+9],%l1
1118 or %l4,%l6,%o1
1119 ldub [%i0+10],%l2
1120
1121 sll %l0,24,%l0
1122 ldub [%i0+11],%l3
1123 sll %l1,16,%l1
1124 ldub [%i0+12],%l4
1125 sll %l2,8,%l2
1126 or %l1,%l0,%l0
1127 ldub [%i0+13],%l5
1128 sll %l4,24,%l4
1129 or %l3,%l2,%l2
1130 ldub [%i0+14],%l6
1131 sll %l5,16,%l5
1132 or %l0,%l2,%o2
1133 ldub [%i0+15],%l7
1134
1135 sll %l6,8,%l6
1136 or %l5,%l4,%l4
1137 or %l7,%l6,%l6
1138 or %l4,%l6,%o3
1139
1140 1: call .+8
1141 add %o7,AES_Td-1b,%o4
1142 call _sparcv9_AES_decrypt
1143 mov %i2,%o5
1144
1145 srl %o0,24,%l0
1146 srl %o0,16,%l1
1147 stb %l0,[%i1+0]
1148 srl %o0,8,%l2
1149 stb %l1,[%i1+1]
1150 stb %l2,[%i1+2]
1151 srl %o1,24,%l4
1152 stb %o0,[%i1+3]
1153
1154 srl %o1,16,%l5
1155 stb %l4,[%i1+4]
1156 srl %o1,8,%l6
1157 stb %l5,[%i1+5]
1158 stb %l6,[%i1+6]
1159 srl %o2,24,%l0
1160 stb %o1,[%i1+7]
1161
1162 srl %o2,16,%l1
1163 stb %l0,[%i1+8]
1164 srl %o2,8,%l2
1165 stb %l1,[%i1+9]
1166 stb %l2,[%i1+10]
1167 srl %o3,24,%l4
1168 stb %o2,[%i1+11]
1169
1170 srl %o3,16,%l5
1171 stb %l4,[%i1+12]
1172 srl %o3,8,%l6
1173 stb %l5,[%i1+13]
1174 stb %l6,[%i1+14]
1175 stb %o3,[%i1+15]
1176
1177 ret
1178 restore
1179 .type AES_decrypt,#function
1180 .size AES_decrypt,(.-AES_decrypt)
1181 ___
1182
1183 # fmovs instructions substituting for FP nops were originally added
1184 # to meet specific instruction alignment requirements to maximize ILP.
1185 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1186 # undesired effect, so just omit them and sacrifice some portion of
1187 # percent in performance...
1188 $code =~ s/fmovs.*$//gm;
1189
1190 print $code;
1191 close STDOUT or die "error closing STDOUT: $!"; # ensure flush