]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/asm/aes-sparcv9.pl
Do not silently truncate files on perlasm errors
[thirdparty/openssl.git] / crypto / aes / asm / aes-sparcv9.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
7395d852
AP
9#
10# ====================================================================
e3713c36 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
7395d852
AP
12# project. Rights for redistribution and usage in source and binary
13# forms are granted according to the OpenSSL license.
14# ====================================================================
15#
985e4c41 16# Version 1.1
7395d852
AP
17#
18# The major reason for undertaken effort was to mitigate the hazard of
19# cache-timing attack. This is [currently and initially!] addressed in
20# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
21# 2. References to them are scheduled for L2 cache latency, meaning
22# that the tables don't have to reside in L1 cache. Once again, this
23# is an initial draft and one should expect more countermeasures to
24# be implemented...
25#
985e4c41
AP
26# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
27# round.
28#
7395d852
AP
29# Even though performance was not the primary goal [on the contrary,
30# extra shifts "induced" by compressed S-box and longer loop epilogue
31# "induced" by scheduling for L2 have negative effect on performance],
32# the code turned out to run in ~23 cycles per processed byte en-/
33# decrypted with 128-bit key. This is pretty good result for code
34# with mentioned qualities and UltraSPARC core. Compared to Sun C
35# generated code my encrypt procedure runs just few percents faster,
36# while decrypt one - whole 50% faster [yes, Sun C failed to generate
37# optimal decrypt procedure]. Compared to GNU C generated code both
38# procedures are more than 60% faster:-)
39
eb77e888
AP
40$output = pop;
41open STDOUT,">$output";
42
43$frame="STACK_FRAME";
44$bias="STACK_BIAS";
7395d852
AP
45$locals=16;
46
47$acc0="%l0";
48$acc1="%o0";
49$acc2="%o1";
50$acc3="%o2";
51
52$acc4="%l1";
53$acc5="%o3";
54$acc6="%o4";
55$acc7="%o5";
56
57$acc8="%l2";
58$acc9="%o7";
59$acc10="%g1";
60$acc11="%g2";
61
62$acc12="%l3";
63$acc13="%g3";
64$acc14="%g4";
65$acc15="%g5";
66
67$t0="%l4";
68$t1="%l5";
69$t2="%l6";
70$t3="%l7";
71
72$s0="%i0";
73$s1="%i1";
74$s2="%i2";
75$s3="%i3";
76$tbl="%i4";
77$key="%i5";
78$rounds="%i7"; # aliases with return address, which is off-loaded to stack
79
80sub _data_word()
81{ my $i;
82 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
83}
84
eb77e888
AP
85$code.=<<___;
86#include "sparc_arch.h"
87
88#ifdef __arch64__
7395d852
AP
89.register %g2,#scratch
90.register %g3,#scratch
eb77e888 91#endif
7395d852
AP
92.section ".text",#alloc,#execinstr
93
985e4c41 94.align 256
7395d852
AP
95AES_Te:
96___
97&_data_word(
98 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
99 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
100 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
101 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
102 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
103 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
104 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
105 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
106 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
107 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
108 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
109 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
110 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
111 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
112 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
113 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
114 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
115 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
116 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
117 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
118 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
119 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
120 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
121 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
122 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
123 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
124 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
125 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
126 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
127 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
128 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
129 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
130 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
131 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
132 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
133 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
134 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
135 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
136 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
137 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
138 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
139 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
140 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
141 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
142 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
143 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
144 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
145 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
146 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
147 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
148 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
149 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
150 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
151 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
152 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
153 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
154 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
155 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
156 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
157 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
158 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
159 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
160 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
161 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
162$code.=<<___;
163 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
164 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
165 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
166 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
167 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
168 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
169 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
170 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
171 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
172 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
173 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
174 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
175 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
176 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
177 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
178 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
179 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
180 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
181 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
182 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
183 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
184 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
185 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
186 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
187 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
188 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
189 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
190 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
191 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
192 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
193 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
194 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
195.type AES_Te,#object
196.size AES_Te,(.-AES_Te)
197
198.align 64
199.skip 16
200_sparcv9_AES_encrypt:
201 save %sp,-$frame-$locals,%sp
202 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
203 ld [$key+240],$rounds
204 ld [$key+0],$t0
205 ld [$key+4],$t1 !
206 ld [$key+8],$t2
207 srl $rounds,1,$rounds
208 xor $t0,$s0,$s0
209 ld [$key+12],$t3
210 srl $s0,21,$acc0
211 xor $t1,$s1,$s1
212 ld [$key+16],$t0
213 srl $s1,13,$acc1 !
214 xor $t2,$s2,$s2
215 ld [$key+20],$t1
216 xor $t3,$s3,$s3
217 ld [$key+24],$t2
218 and $acc0,2040,$acc0
219 ld [$key+28],$t3
220 nop
221.Lenc_loop:
222 srl $s2,5,$acc2 !
223 and $acc1,2040,$acc1
224 ldx [$tbl+$acc0],$acc0
225 sll $s3,3,$acc3
226 and $acc2,2040,$acc2
227 ldx [$tbl+$acc1],$acc1
228 srl $s1,21,$acc4
229 and $acc3,2040,$acc3
230 ldx [$tbl+$acc2],$acc2 !
231 srl $s2,13,$acc5
232 and $acc4,2040,$acc4
233 ldx [$tbl+$acc3],$acc3
234 srl $s3,5,$acc6
235 and $acc5,2040,$acc5
236 ldx [$tbl+$acc4],$acc4
237 fmovs %f0,%f0
238 sll $s0,3,$acc7 !
239 and $acc6,2040,$acc6
240 ldx [$tbl+$acc5],$acc5
241 srl $s2,21,$acc8
242 and $acc7,2040,$acc7
243 ldx [$tbl+$acc6],$acc6
244 srl $s3,13,$acc9
245 and $acc8,2040,$acc8
246 ldx [$tbl+$acc7],$acc7 !
247 srl $s0,5,$acc10
248 and $acc9,2040,$acc9
249 ldx [$tbl+$acc8],$acc8
250 sll $s1,3,$acc11
251 and $acc10,2040,$acc10
252 ldx [$tbl+$acc9],$acc9
253 fmovs %f0,%f0
254 srl $s3,21,$acc12 !
255 and $acc11,2040,$acc11
256 ldx [$tbl+$acc10],$acc10
257 srl $s0,13,$acc13
258 and $acc12,2040,$acc12
259 ldx [$tbl+$acc11],$acc11
260 srl $s1,5,$acc14
261 and $acc13,2040,$acc13
262 ldx [$tbl+$acc12],$acc12 !
263 sll $s2,3,$acc15
264 and $acc14,2040,$acc14
265 ldx [$tbl+$acc13],$acc13
266 and $acc15,2040,$acc15
267 add $key,32,$key
268 ldx [$tbl+$acc14],$acc14
269 fmovs %f0,%f0
270 subcc $rounds,1,$rounds !
271 ldx [$tbl+$acc15],$acc15
272 bz,a,pn %icc,.Lenc_last
273 add $tbl,2048,$rounds
274
275 srlx $acc1,8,$acc1
276 xor $acc0,$t0,$t0
277 ld [$key+0],$s0
278 fmovs %f0,%f0
279 srlx $acc2,16,$acc2 !
280 xor $acc1,$t0,$t0
281 ld [$key+4],$s1
282 srlx $acc3,24,$acc3
283 xor $acc2,$t0,$t0
284 ld [$key+8],$s2
285 srlx $acc5,8,$acc5
286 xor $acc3,$t0,$t0
287 ld [$key+12],$s3 !
288 srlx $acc6,16,$acc6
289 xor $acc4,$t1,$t1
290 fmovs %f0,%f0
291 srlx $acc7,24,$acc7
292 xor $acc5,$t1,$t1
293 srlx $acc9,8,$acc9
294 xor $acc6,$t1,$t1
295 srlx $acc10,16,$acc10 !
296 xor $acc7,$t1,$t1
297 srlx $acc11,24,$acc11
298 xor $acc8,$t2,$t2
299 srlx $acc13,8,$acc13
300 xor $acc9,$t2,$t2
301 srlx $acc14,16,$acc14
302 xor $acc10,$t2,$t2
303 srlx $acc15,24,$acc15 !
304 xor $acc11,$t2,$t2
305 xor $acc12,$acc14,$acc14
306 xor $acc13,$t3,$t3
307 srl $t0,21,$acc0
308 xor $acc14,$t3,$t3
309 srl $t1,13,$acc1
310 xor $acc15,$t3,$t3
311
312 and $acc0,2040,$acc0 !
313 srl $t2,5,$acc2
314 and $acc1,2040,$acc1
315 ldx [$tbl+$acc0],$acc0
316 sll $t3,3,$acc3
317 and $acc2,2040,$acc2
318 ldx [$tbl+$acc1],$acc1
319 fmovs %f0,%f0
320 srl $t1,21,$acc4 !
321 and $acc3,2040,$acc3
322 ldx [$tbl+$acc2],$acc2
323 srl $t2,13,$acc5
324 and $acc4,2040,$acc4
325 ldx [$tbl+$acc3],$acc3
326 srl $t3,5,$acc6
327 and $acc5,2040,$acc5
328 ldx [$tbl+$acc4],$acc4 !
329 sll $t0,3,$acc7
330 and $acc6,2040,$acc6
331 ldx [$tbl+$acc5],$acc5
332 srl $t2,21,$acc8
333 and $acc7,2040,$acc7
334 ldx [$tbl+$acc6],$acc6
335 fmovs %f0,%f0
336 srl $t3,13,$acc9 !
337 and $acc8,2040,$acc8
338 ldx [$tbl+$acc7],$acc7
339 srl $t0,5,$acc10
340 and $acc9,2040,$acc9
341 ldx [$tbl+$acc8],$acc8
342 sll $t1,3,$acc11
343 and $acc10,2040,$acc10
344 ldx [$tbl+$acc9],$acc9 !
345 srl $t3,21,$acc12
346 and $acc11,2040,$acc11
347 ldx [$tbl+$acc10],$acc10
348 srl $t0,13,$acc13
349 and $acc12,2040,$acc12
350 ldx [$tbl+$acc11],$acc11
351 fmovs %f0,%f0
352 srl $t1,5,$acc14 !
353 and $acc13,2040,$acc13
354 ldx [$tbl+$acc12],$acc12
355 sll $t2,3,$acc15
356 and $acc14,2040,$acc14
357 ldx [$tbl+$acc13],$acc13
358 srlx $acc1,8,$acc1
359 and $acc15,2040,$acc15
360 ldx [$tbl+$acc14],$acc14 !
361
362 srlx $acc2,16,$acc2
363 xor $acc0,$s0,$s0
364 ldx [$tbl+$acc15],$acc15
365 srlx $acc3,24,$acc3
366 xor $acc1,$s0,$s0
367 ld [$key+16],$t0
368 fmovs %f0,%f0
369 srlx $acc5,8,$acc5 !
370 xor $acc2,$s0,$s0
371 ld [$key+20],$t1
372 srlx $acc6,16,$acc6
373 xor $acc3,$s0,$s0
374 ld [$key+24],$t2
375 srlx $acc7,24,$acc7
376 xor $acc4,$s1,$s1
377 ld [$key+28],$t3 !
378 srlx $acc9,8,$acc9
379 xor $acc5,$s1,$s1
985e4c41 380 ldx [$tbl+2048+0],%g0 ! prefetch te4
7395d852
AP
381 srlx $acc10,16,$acc10
382 xor $acc6,$s1,$s1
985e4c41 383 ldx [$tbl+2048+32],%g0 ! prefetch te4
7395d852
AP
384 srlx $acc11,24,$acc11
385 xor $acc7,$s1,$s1
985e4c41 386 ldx [$tbl+2048+64],%g0 ! prefetch te4
7395d852
AP
387 srlx $acc13,8,$acc13
388 xor $acc8,$s2,$s2
985e4c41 389 ldx [$tbl+2048+96],%g0 ! prefetch te4
7395d852
AP
390 srlx $acc14,16,$acc14 !
391 xor $acc9,$s2,$s2
985e4c41 392 ldx [$tbl+2048+128],%g0 ! prefetch te4
7395d852
AP
393 srlx $acc15,24,$acc15
394 xor $acc10,$s2,$s2
985e4c41 395 ldx [$tbl+2048+160],%g0 ! prefetch te4
7395d852
AP
396 srl $s0,21,$acc0
397 xor $acc11,$s2,$s2
985e4c41 398 ldx [$tbl+2048+192],%g0 ! prefetch te4
7395d852
AP
399 xor $acc12,$acc14,$acc14
400 xor $acc13,$s3,$s3
985e4c41 401 ldx [$tbl+2048+224],%g0 ! prefetch te4
7395d852
AP
402 srl $s1,13,$acc1 !
403 xor $acc14,$s3,$s3
404 xor $acc15,$s3,$s3
405 ba .Lenc_loop
406 and $acc0,2040,$acc0
407
408.align 32
409.Lenc_last:
410 srlx $acc1,8,$acc1 !
411 xor $acc0,$t0,$t0
412 ld [$key+0],$s0
413 srlx $acc2,16,$acc2
414 xor $acc1,$t0,$t0
415 ld [$key+4],$s1
416 srlx $acc3,24,$acc3
417 xor $acc2,$t0,$t0
418 ld [$key+8],$s2 !
419 srlx $acc5,8,$acc5
420 xor $acc3,$t0,$t0
421 ld [$key+12],$s3
422 srlx $acc6,16,$acc6
423 xor $acc4,$t1,$t1
424 srlx $acc7,24,$acc7
425 xor $acc5,$t1,$t1
426 srlx $acc9,8,$acc9 !
427 xor $acc6,$t1,$t1
428 srlx $acc10,16,$acc10
429 xor $acc7,$t1,$t1
430 srlx $acc11,24,$acc11
431 xor $acc8,$t2,$t2
432 srlx $acc13,8,$acc13
433 xor $acc9,$t2,$t2
434 srlx $acc14,16,$acc14 !
435 xor $acc10,$t2,$t2
436 srlx $acc15,24,$acc15
437 xor $acc11,$t2,$t2
438 xor $acc12,$acc14,$acc14
439 xor $acc13,$t3,$t3
440 srl $t0,24,$acc0
441 xor $acc14,$t3,$t3
442 srl $t1,16,$acc1 !
443 xor $acc15,$t3,$t3
444
445 srl $t2,8,$acc2
446 and $acc1,255,$acc1
447 ldub [$rounds+$acc0],$acc0
448 srl $t1,24,$acc4
449 and $acc2,255,$acc2
450 ldub [$rounds+$acc1],$acc1
451 srl $t2,16,$acc5 !
452 and $t3,255,$acc3
453 ldub [$rounds+$acc2],$acc2
454 ldub [$rounds+$acc3],$acc3
455 srl $t3,8,$acc6
456 and $acc5,255,$acc5
457 ldub [$rounds+$acc4],$acc4
458 fmovs %f0,%f0
459 srl $t2,24,$acc8 !
460 and $acc6,255,$acc6
461 ldub [$rounds+$acc5],$acc5
462 srl $t3,16,$acc9
463 and $t0,255,$acc7
464 ldub [$rounds+$acc6],$acc6
465 ldub [$rounds+$acc7],$acc7
466 fmovs %f0,%f0
467 srl $t0,8,$acc10 !
468 and $acc9,255,$acc9
469 ldub [$rounds+$acc8],$acc8
470 srl $t3,24,$acc12
471 and $acc10,255,$acc10
472 ldub [$rounds+$acc9],$acc9
473 srl $t0,16,$acc13
474 and $t1,255,$acc11
475 ldub [$rounds+$acc10],$acc10 !
476 srl $t1,8,$acc14
477 and $acc13,255,$acc13
478 ldub [$rounds+$acc11],$acc11
479 ldub [$rounds+$acc12],$acc12
480 and $acc14,255,$acc14
481 ldub [$rounds+$acc13],$acc13
482 and $t2,255,$acc15
483 ldub [$rounds+$acc14],$acc14 !
484
485 sll $acc0,24,$acc0
486 xor $acc3,$s0,$s0
487 ldub [$rounds+$acc15],$acc15
488 sll $acc1,16,$acc1
489 xor $acc0,$s0,$s0
490 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
491 fmovs %f0,%f0
492 sll $acc2,8,$acc2 !
493 xor $acc1,$s0,$s0
494 sll $acc4,24,$acc4
495 xor $acc2,$s0,$s0
496 sll $acc5,16,$acc5
497 xor $acc7,$s1,$s1
498 sll $acc6,8,$acc6
499 xor $acc4,$s1,$s1
500 sll $acc8,24,$acc8 !
501 xor $acc5,$s1,$s1
502 sll $acc9,16,$acc9
503 xor $acc11,$s2,$s2
504 sll $acc10,8,$acc10
505 xor $acc6,$s1,$s1
506 sll $acc12,24,$acc12
507 xor $acc8,$s2,$s2
508 sll $acc13,16,$acc13 !
509 xor $acc9,$s2,$s2
510 sll $acc14,8,$acc14
511 xor $acc10,$s2,$s2
512 xor $acc12,$acc14,$acc14
513 xor $acc13,$s3,$s3
514 xor $acc14,$s3,$s3
515 xor $acc15,$s3,$s3
516
517 ret
518 restore
519.type _sparcv9_AES_encrypt,#function
520.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
521
522.align 32
523.globl AES_encrypt
524AES_encrypt:
525 or %o0,%o1,%g1
526 andcc %g1,3,%g0
527 bnz,pn %xcc,.Lunaligned_enc
528 save %sp,-$frame,%sp
529
530 ld [%i0+0],%o0
531 ld [%i0+4],%o1
532 ld [%i0+8],%o2
533 ld [%i0+12],%o3
534
4c78bc05 5351: call .+8
e22b8648 536 add %o7,AES_Te-1b,%o4
4c78bc05
AP
537 call _sparcv9_AES_encrypt
538 mov %i2,%o5
7395d852
AP
539
540 st %o0,[%i1+0]
541 st %o1,[%i1+4]
542 st %o2,[%i1+8]
543 st %o3,[%i1+12]
544
545 ret
546 restore
547
548.align 32
549.Lunaligned_enc:
550 ldub [%i0+0],%l0
551 ldub [%i0+1],%l1
552 ldub [%i0+2],%l2
553
554 sll %l0,24,%l0
555 ldub [%i0+3],%l3
556 sll %l1,16,%l1
557 ldub [%i0+4],%l4
558 sll %l2,8,%l2
559 or %l1,%l0,%l0
560 ldub [%i0+5],%l5
561 sll %l4,24,%l4
562 or %l3,%l2,%l2
563 ldub [%i0+6],%l6
564 sll %l5,16,%l5
565 or %l0,%l2,%o0
566 ldub [%i0+7],%l7
567
568 sll %l6,8,%l6
569 or %l5,%l4,%l4
570 ldub [%i0+8],%l0
571 or %l7,%l6,%l6
572 ldub [%i0+9],%l1
573 or %l4,%l6,%o1
574 ldub [%i0+10],%l2
575
576 sll %l0,24,%l0
577 ldub [%i0+11],%l3
578 sll %l1,16,%l1
579 ldub [%i0+12],%l4
580 sll %l2,8,%l2
581 or %l1,%l0,%l0
582 ldub [%i0+13],%l5
583 sll %l4,24,%l4
584 or %l3,%l2,%l2
585 ldub [%i0+14],%l6
586 sll %l5,16,%l5
587 or %l0,%l2,%o2
588 ldub [%i0+15],%l7
589
590 sll %l6,8,%l6
591 or %l5,%l4,%l4
592 or %l7,%l6,%l6
593 or %l4,%l6,%o3
594
4c78bc05 5951: call .+8
e22b8648 596 add %o7,AES_Te-1b,%o4
4c78bc05
AP
597 call _sparcv9_AES_encrypt
598 mov %i2,%o5
7395d852
AP
599
600 srl %o0,24,%l0
601 srl %o0,16,%l1
602 stb %l0,[%i1+0]
603 srl %o0,8,%l2
604 stb %l1,[%i1+1]
605 stb %l2,[%i1+2]
606 srl %o1,24,%l4
607 stb %o0,[%i1+3]
608
609 srl %o1,16,%l5
610 stb %l4,[%i1+4]
611 srl %o1,8,%l6
612 stb %l5,[%i1+5]
613 stb %l6,[%i1+6]
614 srl %o2,24,%l0
615 stb %o1,[%i1+7]
616
617 srl %o2,16,%l1
618 stb %l0,[%i1+8]
619 srl %o2,8,%l2
620 stb %l1,[%i1+9]
621 stb %l2,[%i1+10]
622 srl %o3,24,%l4
623 stb %o2,[%i1+11]
624
625 srl %o3,16,%l5
626 stb %l4,[%i1+12]
627 srl %o3,8,%l6
628 stb %l5,[%i1+13]
629 stb %l6,[%i1+14]
630 stb %o3,[%i1+15]
631
632 ret
633 restore
634.type AES_encrypt,#function
635.size AES_encrypt,(.-AES_encrypt)
636
637___
638
639$code.=<<___;
985e4c41 640.align 256
7395d852
AP
641AES_Td:
642___
643&_data_word(
644 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
645 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
646 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
647 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
648 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
649 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
650 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
651 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
652 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
653 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
654 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
655 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
656 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
657 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
658 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
659 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
660 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
661 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
662 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
663 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
664 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
665 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
666 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
667 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
668 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
669 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
670 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
671 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
672 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
673 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
674 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
675 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
676 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
677 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
678 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
679 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
680 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
681 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
682 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
683 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
684 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
685 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
686 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
687 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
688 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
689 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
690 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
691 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
692 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
693 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
694 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
695 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
696 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
697 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
698 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
699 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
700 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
701 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
702 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
703 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
704 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
705 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
706 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
707 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
708$code.=<<___;
709 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
710 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
711 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
712 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
713 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
714 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
715 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
716 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
717 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
718 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
719 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
720 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
721 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
722 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
723 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
724 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
725 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
726 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
727 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
728 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
729 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
730 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
731 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
732 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
733 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
734 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
735 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
736 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
737 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
738 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
739 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
740 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
741.type AES_Td,#object
742.size AES_Td,(.-AES_Td)
743
744.align 64
745.skip 16
746_sparcv9_AES_decrypt:
747 save %sp,-$frame-$locals,%sp
748 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
749 ld [$key+240],$rounds
750 ld [$key+0],$t0
751 ld [$key+4],$t1 !
752 ld [$key+8],$t2
753 ld [$key+12],$t3
754 srl $rounds,1,$rounds
755 xor $t0,$s0,$s0
756 ld [$key+16],$t0
757 xor $t1,$s1,$s1
758 ld [$key+20],$t1
759 srl $s0,21,$acc0 !
760 xor $t2,$s2,$s2
761 ld [$key+24],$t2
762 xor $t3,$s3,$s3
763 and $acc0,2040,$acc0
764 ld [$key+28],$t3
765 srl $s3,13,$acc1
766 nop
767.Ldec_loop:
768 srl $s2,5,$acc2 !
769 and $acc1,2040,$acc1
770 ldx [$tbl+$acc0],$acc0
771 sll $s1,3,$acc3
772 and $acc2,2040,$acc2
773 ldx [$tbl+$acc1],$acc1
774 srl $s1,21,$acc4
775 and $acc3,2040,$acc3
776 ldx [$tbl+$acc2],$acc2 !
777 srl $s0,13,$acc5
778 and $acc4,2040,$acc4
779 ldx [$tbl+$acc3],$acc3
780 srl $s3,5,$acc6
781 and $acc5,2040,$acc5
782 ldx [$tbl+$acc4],$acc4
783 fmovs %f0,%f0
784 sll $s2,3,$acc7 !
785 and $acc6,2040,$acc6
786 ldx [$tbl+$acc5],$acc5
787 srl $s2,21,$acc8
788 and $acc7,2040,$acc7
789 ldx [$tbl+$acc6],$acc6
790 srl $s1,13,$acc9
791 and $acc8,2040,$acc8
792 ldx [$tbl+$acc7],$acc7 !
793 srl $s0,5,$acc10
794 and $acc9,2040,$acc9
795 ldx [$tbl+$acc8],$acc8
796 sll $s3,3,$acc11
797 and $acc10,2040,$acc10
798 ldx [$tbl+$acc9],$acc9
799 fmovs %f0,%f0
800 srl $s3,21,$acc12 !
801 and $acc11,2040,$acc11
802 ldx [$tbl+$acc10],$acc10
803 srl $s2,13,$acc13
804 and $acc12,2040,$acc12
805 ldx [$tbl+$acc11],$acc11
806 srl $s1,5,$acc14
807 and $acc13,2040,$acc13
808 ldx [$tbl+$acc12],$acc12 !
809 sll $s0,3,$acc15
810 and $acc14,2040,$acc14
811 ldx [$tbl+$acc13],$acc13
812 and $acc15,2040,$acc15
813 add $key,32,$key
814 ldx [$tbl+$acc14],$acc14
815 fmovs %f0,%f0
816 subcc $rounds,1,$rounds !
817 ldx [$tbl+$acc15],$acc15
818 bz,a,pn %icc,.Ldec_last
819 add $tbl,2048,$rounds
820
821 srlx $acc1,8,$acc1
822 xor $acc0,$t0,$t0
823 ld [$key+0],$s0
824 fmovs %f0,%f0
825 srlx $acc2,16,$acc2 !
826 xor $acc1,$t0,$t0
827 ld [$key+4],$s1
828 srlx $acc3,24,$acc3
829 xor $acc2,$t0,$t0
830 ld [$key+8],$s2
831 srlx $acc5,8,$acc5
832 xor $acc3,$t0,$t0
833 ld [$key+12],$s3 !
834 srlx $acc6,16,$acc6
835 xor $acc4,$t1,$t1
836 fmovs %f0,%f0
837 srlx $acc7,24,$acc7
838 xor $acc5,$t1,$t1
839 srlx $acc9,8,$acc9
840 xor $acc6,$t1,$t1
841 srlx $acc10,16,$acc10 !
842 xor $acc7,$t1,$t1
843 srlx $acc11,24,$acc11
844 xor $acc8,$t2,$t2
845 srlx $acc13,8,$acc13
846 xor $acc9,$t2,$t2
847 srlx $acc14,16,$acc14
848 xor $acc10,$t2,$t2
849 srlx $acc15,24,$acc15 !
850 xor $acc11,$t2,$t2
851 xor $acc12,$acc14,$acc14
852 xor $acc13,$t3,$t3
853 srl $t0,21,$acc0
854 xor $acc14,$t3,$t3
855 xor $acc15,$t3,$t3
856 srl $t3,13,$acc1
857
858 and $acc0,2040,$acc0 !
859 srl $t2,5,$acc2
860 and $acc1,2040,$acc1
861 ldx [$tbl+$acc0],$acc0
862 sll $t1,3,$acc3
863 and $acc2,2040,$acc2
864 ldx [$tbl+$acc1],$acc1
865 fmovs %f0,%f0
866 srl $t1,21,$acc4 !
867 and $acc3,2040,$acc3
868 ldx [$tbl+$acc2],$acc2
869 srl $t0,13,$acc5
870 and $acc4,2040,$acc4
871 ldx [$tbl+$acc3],$acc3
872 srl $t3,5,$acc6
873 and $acc5,2040,$acc5
874 ldx [$tbl+$acc4],$acc4 !
875 sll $t2,3,$acc7
876 and $acc6,2040,$acc6
877 ldx [$tbl+$acc5],$acc5
878 srl $t2,21,$acc8
879 and $acc7,2040,$acc7
880 ldx [$tbl+$acc6],$acc6
881 fmovs %f0,%f0
882 srl $t1,13,$acc9 !
883 and $acc8,2040,$acc8
884 ldx [$tbl+$acc7],$acc7
885 srl $t0,5,$acc10
886 and $acc9,2040,$acc9
887 ldx [$tbl+$acc8],$acc8
888 sll $t3,3,$acc11
889 and $acc10,2040,$acc10
890 ldx [$tbl+$acc9],$acc9 !
891 srl $t3,21,$acc12
892 and $acc11,2040,$acc11
893 ldx [$tbl+$acc10],$acc10
894 srl $t2,13,$acc13
895 and $acc12,2040,$acc12
896 ldx [$tbl+$acc11],$acc11
897 fmovs %f0,%f0
898 srl $t1,5,$acc14 !
899 and $acc13,2040,$acc13
900 ldx [$tbl+$acc12],$acc12
901 sll $t0,3,$acc15
902 and $acc14,2040,$acc14
903 ldx [$tbl+$acc13],$acc13
904 srlx $acc1,8,$acc1
905 and $acc15,2040,$acc15
906 ldx [$tbl+$acc14],$acc14 !
907
908 srlx $acc2,16,$acc2
909 xor $acc0,$s0,$s0
910 ldx [$tbl+$acc15],$acc15
911 srlx $acc3,24,$acc3
912 xor $acc1,$s0,$s0
913 ld [$key+16],$t0
914 fmovs %f0,%f0
915 srlx $acc5,8,$acc5 !
916 xor $acc2,$s0,$s0
917 ld [$key+20],$t1
918 srlx $acc6,16,$acc6
919 xor $acc3,$s0,$s0
920 ld [$key+24],$t2
921 srlx $acc7,24,$acc7
922 xor $acc4,$s1,$s1
923 ld [$key+28],$t3 !
924 srlx $acc9,8,$acc9
925 xor $acc5,$s1,$s1
985e4c41 926 ldx [$tbl+2048+0],%g0 ! prefetch td4
7395d852
AP
927 srlx $acc10,16,$acc10
928 xor $acc6,$s1,$s1
985e4c41 929 ldx [$tbl+2048+32],%g0 ! prefetch td4
7395d852
AP
930 srlx $acc11,24,$acc11
931 xor $acc7,$s1,$s1
985e4c41 932 ldx [$tbl+2048+64],%g0 ! prefetch td4
7395d852
AP
933 srlx $acc13,8,$acc13
934 xor $acc8,$s2,$s2
985e4c41 935 ldx [$tbl+2048+96],%g0 ! prefetch td4
7395d852
AP
936 srlx $acc14,16,$acc14 !
937 xor $acc9,$s2,$s2
985e4c41 938 ldx [$tbl+2048+128],%g0 ! prefetch td4
7395d852
AP
939 srlx $acc15,24,$acc15
940 xor $acc10,$s2,$s2
985e4c41 941 ldx [$tbl+2048+160],%g0 ! prefetch td4
7395d852
AP
942 srl $s0,21,$acc0
943 xor $acc11,$s2,$s2
985e4c41 944 ldx [$tbl+2048+192],%g0 ! prefetch td4
7395d852
AP
945 xor $acc12,$acc14,$acc14
946 xor $acc13,$s3,$s3
985e4c41 947 ldx [$tbl+2048+224],%g0 ! prefetch td4
7395d852
AP
948 and $acc0,2040,$acc0 !
949 xor $acc14,$s3,$s3
950 xor $acc15,$s3,$s3
951 ba .Ldec_loop
952 srl $s3,13,$acc1
953
954.align 32
955.Ldec_last:
956 srlx $acc1,8,$acc1 !
957 xor $acc0,$t0,$t0
958 ld [$key+0],$s0
959 srlx $acc2,16,$acc2
960 xor $acc1,$t0,$t0
961 ld [$key+4],$s1
962 srlx $acc3,24,$acc3
963 xor $acc2,$t0,$t0
964 ld [$key+8],$s2 !
965 srlx $acc5,8,$acc5
966 xor $acc3,$t0,$t0
967 ld [$key+12],$s3
968 srlx $acc6,16,$acc6
969 xor $acc4,$t1,$t1
970 srlx $acc7,24,$acc7
971 xor $acc5,$t1,$t1
972 srlx $acc9,8,$acc9 !
973 xor $acc6,$t1,$t1
974 srlx $acc10,16,$acc10
975 xor $acc7,$t1,$t1
976 srlx $acc11,24,$acc11
977 xor $acc8,$t2,$t2
978 srlx $acc13,8,$acc13
979 xor $acc9,$t2,$t2
980 srlx $acc14,16,$acc14 !
981 xor $acc10,$t2,$t2
982 srlx $acc15,24,$acc15
983 xor $acc11,$t2,$t2
984 xor $acc12,$acc14,$acc14
985 xor $acc13,$t3,$t3
986 srl $t0,24,$acc0
987 xor $acc14,$t3,$t3
988 xor $acc15,$t3,$t3 !
989 srl $t3,16,$acc1
990
991 srl $t2,8,$acc2
992 and $acc1,255,$acc1
993 ldub [$rounds+$acc0],$acc0
994 srl $t1,24,$acc4
995 and $acc2,255,$acc2
996 ldub [$rounds+$acc1],$acc1
997 srl $t0,16,$acc5 !
998 and $t1,255,$acc3
999 ldub [$rounds+$acc2],$acc2
1000 ldub [$rounds+$acc3],$acc3
1001 srl $t3,8,$acc6
1002 and $acc5,255,$acc5
1003 ldub [$rounds+$acc4],$acc4
1004 fmovs %f0,%f0
1005 srl $t2,24,$acc8 !
1006 and $acc6,255,$acc6
1007 ldub [$rounds+$acc5],$acc5
1008 srl $t1,16,$acc9
1009 and $t2,255,$acc7
1010 ldub [$rounds+$acc6],$acc6
1011 ldub [$rounds+$acc7],$acc7
1012 fmovs %f0,%f0
1013 srl $t0,8,$acc10 !
1014 and $acc9,255,$acc9
1015 ldub [$rounds+$acc8],$acc8
1016 srl $t3,24,$acc12
1017 and $acc10,255,$acc10
1018 ldub [$rounds+$acc9],$acc9
1019 srl $t2,16,$acc13
1020 and $t3,255,$acc11
1021 ldub [$rounds+$acc10],$acc10 !
1022 srl $t1,8,$acc14
1023 and $acc13,255,$acc13
1024 ldub [$rounds+$acc11],$acc11
1025 ldub [$rounds+$acc12],$acc12
1026 and $acc14,255,$acc14
1027 ldub [$rounds+$acc13],$acc13
1028 and $t0,255,$acc15
1029 ldub [$rounds+$acc14],$acc14 !
1030
1031 sll $acc0,24,$acc0
1032 xor $acc3,$s0,$s0
1033 ldub [$rounds+$acc15],$acc15
1034 sll $acc1,16,$acc1
1035 xor $acc0,$s0,$s0
1036 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1037 fmovs %f0,%f0
1038 sll $acc2,8,$acc2 !
1039 xor $acc1,$s0,$s0
1040 sll $acc4,24,$acc4
1041 xor $acc2,$s0,$s0
1042 sll $acc5,16,$acc5
1043 xor $acc7,$s1,$s1
1044 sll $acc6,8,$acc6
1045 xor $acc4,$s1,$s1
1046 sll $acc8,24,$acc8 !
1047 xor $acc5,$s1,$s1
1048 sll $acc9,16,$acc9
1049 xor $acc11,$s2,$s2
1050 sll $acc10,8,$acc10
1051 xor $acc6,$s1,$s1
1052 sll $acc12,24,$acc12
1053 xor $acc8,$s2,$s2
1054 sll $acc13,16,$acc13 !
1055 xor $acc9,$s2,$s2
1056 sll $acc14,8,$acc14
1057 xor $acc10,$s2,$s2
1058 xor $acc12,$acc14,$acc14
1059 xor $acc13,$s3,$s3
1060 xor $acc14,$s3,$s3
1061 xor $acc15,$s3,$s3
1062
1063 ret
1064 restore
1065.type _sparcv9_AES_decrypt,#function
1066.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1067
1068.align 32
1069.globl AES_decrypt
1070AES_decrypt:
1071 or %o0,%o1,%g1
1072 andcc %g1,3,%g0
1073 bnz,pn %xcc,.Lunaligned_dec
1074 save %sp,-$frame,%sp
1075
1076 ld [%i0+0],%o0
1077 ld [%i0+4],%o1
1078 ld [%i0+8],%o2
1079 ld [%i0+12],%o3
1080
4c78bc05 10811: call .+8
e22b8648 1082 add %o7,AES_Td-1b,%o4
4c78bc05
AP
1083 call _sparcv9_AES_decrypt
1084 mov %i2,%o5
7395d852
AP
1085
1086 st %o0,[%i1+0]
1087 st %o1,[%i1+4]
1088 st %o2,[%i1+8]
1089 st %o3,[%i1+12]
1090
1091 ret
1092 restore
1093
1094.align 32
1095.Lunaligned_dec:
1096 ldub [%i0+0],%l0
1097 ldub [%i0+1],%l1
1098 ldub [%i0+2],%l2
1099
1100 sll %l0,24,%l0
1101 ldub [%i0+3],%l3
1102 sll %l1,16,%l1
1103 ldub [%i0+4],%l4
1104 sll %l2,8,%l2
1105 or %l1,%l0,%l0
1106 ldub [%i0+5],%l5
1107 sll %l4,24,%l4
1108 or %l3,%l2,%l2
1109 ldub [%i0+6],%l6
1110 sll %l5,16,%l5
1111 or %l0,%l2,%o0
1112 ldub [%i0+7],%l7
1113
1114 sll %l6,8,%l6
1115 or %l5,%l4,%l4
1116 ldub [%i0+8],%l0
1117 or %l7,%l6,%l6
1118 ldub [%i0+9],%l1
1119 or %l4,%l6,%o1
1120 ldub [%i0+10],%l2
1121
1122 sll %l0,24,%l0
1123 ldub [%i0+11],%l3
1124 sll %l1,16,%l1
1125 ldub [%i0+12],%l4
1126 sll %l2,8,%l2
1127 or %l1,%l0,%l0
1128 ldub [%i0+13],%l5
1129 sll %l4,24,%l4
1130 or %l3,%l2,%l2
1131 ldub [%i0+14],%l6
1132 sll %l5,16,%l5
1133 or %l0,%l2,%o2
1134 ldub [%i0+15],%l7
1135
1136 sll %l6,8,%l6
1137 or %l5,%l4,%l4
1138 or %l7,%l6,%l6
1139 or %l4,%l6,%o3
1140
4c78bc05 11411: call .+8
e22b8648 1142 add %o7,AES_Td-1b,%o4
4c78bc05
AP
1143 call _sparcv9_AES_decrypt
1144 mov %i2,%o5
7395d852
AP
1145
1146 srl %o0,24,%l0
1147 srl %o0,16,%l1
1148 stb %l0,[%i1+0]
1149 srl %o0,8,%l2
1150 stb %l1,[%i1+1]
1151 stb %l2,[%i1+2]
1152 srl %o1,24,%l4
1153 stb %o0,[%i1+3]
1154
1155 srl %o1,16,%l5
1156 stb %l4,[%i1+4]
1157 srl %o1,8,%l6
1158 stb %l5,[%i1+5]
1159 stb %l6,[%i1+6]
1160 srl %o2,24,%l0
1161 stb %o1,[%i1+7]
1162
1163 srl %o2,16,%l1
1164 stb %l0,[%i1+8]
1165 srl %o2,8,%l2
1166 stb %l1,[%i1+9]
1167 stb %l2,[%i1+10]
1168 srl %o3,24,%l4
1169 stb %o2,[%i1+11]
1170
1171 srl %o3,16,%l5
1172 stb %l4,[%i1+12]
1173 srl %o3,8,%l6
1174 stb %l5,[%i1+13]
1175 stb %l6,[%i1+14]
1176 stb %o3,[%i1+15]
1177
1178 ret
1179 restore
1180.type AES_decrypt,#function
1181.size AES_decrypt,(.-AES_decrypt)
1182___
1183
1184# fmovs instructions substituting for FP nops were originally added
1185# to meet specific instruction alignment requirements to maximize ILP.
1186# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1187# undesired effect, so just omit them and sacrifice some portion of
1188# percent in performance...
e255024b 1189$code =~ s/fmovs.*$//gm;
7395d852
AP
1190
1191print $code;
218e740f 1192close STDOUT or die "error closing STDOUT: $!"; # ensure flush