]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aes-mips.pl
Update copyright year
[thirdparty/openssl.git] / crypto / aes / asm / aes-mips.pl
1 #! /usr/bin/env perl
2 # Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # AES for MIPS
18
19 # October 2010
20 #
21 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23 # faster than gcc-generated code, which is not very impressive. But
24 # recall that compressed S-box requires extra processing, namely
25 # additional rotations. Rotations are implemented with lwl/lwr pairs,
26 # which is normally used for loading unaligned data. Another cool
27 # thing about this module is its endian neutrality, which means that
28 # it processes data without ever changing byte order...
29
30 # September 2012
31 #
32 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33 # ~25% less instructions) code. Note that there is no run-time switch,
34 # instead, code path is chosen upon pre-process time, pass -mips32r2
35 # or/and -msmartmips.
36
37 # February 2019
38 #
39 # Normalize MIPS32R2 AES table address calculation by always using EXT
40 # instruction. This reduces the standard codebase by another 10%.
41
42 ######################################################################
43 # There is a number of MIPS ABI in use, O32 and N32/64 are most
44 # widely used. Then there is a new contender: NUBI. It appears that if
45 # one picks the latter, it's possible to arrange code in ABI neutral
46 # manner. Therefore let's stick to NUBI register layout:
47 #
48 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
49 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
50 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
51 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
52 #
53 # The return value is placed in $a0. Following coding rules facilitate
54 # interoperability:
55 #
56 # - never ever touch $tp, "thread pointer", former $gp;
57 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
58 # old code];
59 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
60 #
61 # For reference here is register layout for N32/64 MIPS ABIs:
62 #
63 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
64 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
65 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
66 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
67 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
68
69 # $output is the last argument if it looks like a file (it has an extension)
70 # $flavour is the first argument if it doesn't look like a file
71 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
72 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
73 $flavour ||= "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
74
75 if ($flavour =~ /64|n32/i) {
76 $PTR_LA="dla";
77 $PTR_ADD="daddu"; # incidentally works even on n32
78 $PTR_SUB="dsubu"; # incidentally works even on n32
79 $PTR_INS="dins";
80 $REG_S="sd";
81 $REG_L="ld";
82 $PTR_SLL="dsll"; # incidentally works even on n32
83 $SZREG=8;
84 } else {
85 $PTR_LA="la";
86 $PTR_ADD="addu";
87 $PTR_SUB="subu";
88 $PTR_INS="ins";
89 $REG_S="sw";
90 $REG_L="lw";
91 $PTR_SLL="sll";
92 $SZREG=4;
93 }
94 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
95 #
96 # <appro@openssl.org>
97 #
98 ######################################################################
99
100 $big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
101
102 if (!defined($big_endian))
103 { $big_endian=(unpack('L',pack('N',1))==1); }
104
105 my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
106
107 $output and open STDOUT,">$output";
108
109 $code.=<<___;
110 #include "mips_arch.h"
111
112 .text
113 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
114 .option pic2
115 #endif
116 .set noat
117 ___
118 \f
119 {{{
120 my $FRAMESIZE=16*$SZREG;
121 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
122
123 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
124 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
125 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
126 my ($key0,$cnt)=($gp,$fp);
127
128 # instruction ordering is "stolen" from output from MIPSpro assembler
129 # invoked with -mips3 -O3 arguments...
130 $code.=<<___;
131 .align 5
132 .ent _mips_AES_encrypt
133 _mips_AES_encrypt:
134 .frame $sp,0,$ra
135 .set reorder
136 lw $t0,0($key)
137 lw $t1,4($key)
138 lw $t2,8($key)
139 lw $t3,12($key)
140 lw $cnt,240($key)
141 $PTR_ADD $key0,$key,16
142
143 xor $s0,$t0
144 xor $s1,$t1
145 xor $s2,$t2
146 xor $s3,$t3
147
148 subu $cnt,1
149 #if defined(__mips_smartmips)
150 ext $i0,$s1,16,8
151 .Loop_enc:
152 ext $i1,$s2,16,8
153 ext $i2,$s3,16,8
154 ext $i3,$s0,16,8
155 lwxs $t0,$i0($Tbl) # Te1[s1>>16]
156 ext $i0,$s2,8,8
157 lwxs $t1,$i1($Tbl) # Te1[s2>>16]
158 ext $i1,$s3,8,8
159 lwxs $t2,$i2($Tbl) # Te1[s3>>16]
160 ext $i2,$s0,8,8
161 lwxs $t3,$i3($Tbl) # Te1[s0>>16]
162 ext $i3,$s1,8,8
163
164 lwxs $t4,$i0($Tbl) # Te2[s2>>8]
165 ext $i0,$s3,0,8
166 lwxs $t5,$i1($Tbl) # Te2[s3>>8]
167 ext $i1,$s0,0,8
168 lwxs $t6,$i2($Tbl) # Te2[s0>>8]
169 ext $i2,$s1,0,8
170 lwxs $t7,$i3($Tbl) # Te2[s1>>8]
171 ext $i3,$s2,0,8
172
173 lwxs $t8,$i0($Tbl) # Te3[s3]
174 ext $i0,$s0,24,8
175 lwxs $t9,$i1($Tbl) # Te3[s0]
176 ext $i1,$s1,24,8
177 lwxs $t10,$i2($Tbl) # Te3[s1]
178 ext $i2,$s2,24,8
179 lwxs $t11,$i3($Tbl) # Te3[s2]
180 ext $i3,$s3,24,8
181
182 rotr $t0,$t0,8
183 rotr $t1,$t1,8
184 rotr $t2,$t2,8
185 rotr $t3,$t3,8
186
187 rotr $t4,$t4,16
188 rotr $t5,$t5,16
189 rotr $t6,$t6,16
190 rotr $t7,$t7,16
191
192 xor $t0,$t4
193 lwxs $t4,$i0($Tbl) # Te0[s0>>24]
194 xor $t1,$t5
195 lwxs $t5,$i1($Tbl) # Te0[s1>>24]
196 xor $t2,$t6
197 lwxs $t6,$i2($Tbl) # Te0[s2>>24]
198 xor $t3,$t7
199 lwxs $t7,$i3($Tbl) # Te0[s3>>24]
200
201 rotr $t8,$t8,24
202 lw $s0,0($key0)
203 rotr $t9,$t9,24
204 lw $s1,4($key0)
205 rotr $t10,$t10,24
206 lw $s2,8($key0)
207 rotr $t11,$t11,24
208 lw $s3,12($key0)
209
210 xor $t0,$t8
211 xor $t1,$t9
212 xor $t2,$t10
213 xor $t3,$t11
214
215 xor $t0,$t4
216 xor $t1,$t5
217 xor $t2,$t6
218 xor $t3,$t7
219
220 subu $cnt,1
221 $PTR_ADD $key0,16
222 xor $s0,$t0
223 xor $s1,$t1
224 xor $s2,$t2
225 xor $s3,$t3
226 .set noreorder
227 bnez $cnt,.Loop_enc
228 ext $i0,$s1,16,8
229
230 _xtr $i0,$s1,16-2
231 #else
232 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
233 move $i0,$Tbl
234 move $i1,$Tbl
235 move $i2,$Tbl
236 move $i3,$Tbl
237 ext $t0,$s1,16,8
238 .Loop_enc:
239 ext $t1,$s2,16,8
240 ext $t2,$s3,16,8
241 ext $t3,$s0,16,8
242 $PTR_INS $i0,$t0,2,8
243 $PTR_INS $i1,$t1,2,8
244 $PTR_INS $i2,$t2,2,8
245 $PTR_INS $i3,$t3,2,8
246 lw $t0,0($i0) # Te1[s1>>16]
247 ext $t4,$s2,8,8
248 lw $t1,0($i1) # Te1[s2>>16]
249 ext $t5,$s3,8,8
250 lw $t2,0($i2) # Te1[s3>>16]
251 ext $t6,$s0,8,8
252 lw $t3,0($i3) # Te1[s0>>16]
253 ext $t7,$s1,8,8
254 $PTR_INS $i0,$t4,2,8
255 $PTR_INS $i1,$t5,2,8
256 $PTR_INS $i2,$t6,2,8
257 $PTR_INS $i3,$t7,2,8
258 #else
259 _xtr $i0,$s1,16-2
260 .Loop_enc:
261 _xtr $i1,$s2,16-2
262 _xtr $i2,$s3,16-2
263 _xtr $i3,$s0,16-2
264 and $i0,0x3fc
265 and $i1,0x3fc
266 and $i2,0x3fc
267 and $i3,0x3fc
268 $PTR_ADD $i0,$Tbl
269 $PTR_ADD $i1,$Tbl
270 $PTR_ADD $i2,$Tbl
271 $PTR_ADD $i3,$Tbl
272 lwl $t0,3($i0) # Te1[s1>>16]
273 lwl $t1,3($i1) # Te1[s2>>16]
274 lwl $t2,3($i2) # Te1[s3>>16]
275 lwl $t3,3($i3) # Te1[s0>>16]
276 lwr $t0,2($i0) # Te1[s1>>16]
277 _xtr $i0,$s2,8-2
278 lwr $t1,2($i1) # Te1[s2>>16]
279 _xtr $i1,$s3,8-2
280 lwr $t2,2($i2) # Te1[s3>>16]
281 _xtr $i2,$s0,8-2
282 lwr $t3,2($i3) # Te1[s0>>16]
283 _xtr $i3,$s1,8-2
284 and $i0,0x3fc
285 and $i1,0x3fc
286 and $i2,0x3fc
287 and $i3,0x3fc
288 $PTR_ADD $i0,$Tbl
289 $PTR_ADD $i1,$Tbl
290 $PTR_ADD $i2,$Tbl
291 $PTR_ADD $i3,$Tbl
292 #endif
293 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
294 rotr $t0,$t0,8
295 rotr $t1,$t1,8
296 rotr $t2,$t2,8
297 rotr $t3,$t3,8
298 # if defined(_MIPSEL)
299 lw $t4,0($i0) # Te2[s2>>8]
300 ext $t8,$s3,0,8
301 lw $t5,0($i1) # Te2[s3>>8]
302 ext $t9,$s0,0,8
303 lw $t6,0($i2) # Te2[s0>>8]
304 ext $t10,$s1,0,8
305 lw $t7,0($i3) # Te2[s1>>8]
306 ext $t11,$s2,0,8
307 $PTR_INS $i0,$t8,2,8
308 $PTR_INS $i1,$t9,2,8
309 $PTR_INS $i2,$t10,2,8
310 $PTR_INS $i3,$t11,2,8
311
312 lw $t8,0($i0) # Te3[s3]
313 $PTR_INS $i0,$s0,2,8
314 lw $t9,0($i1) # Te3[s0]
315 $PTR_INS $i1,$s1,2,8
316 lw $t10,0($i2) # Te3[s1]
317 $PTR_INS $i2,$s2,2,8
318 lw $t11,0($i3) # Te3[s2]
319 $PTR_INS $i3,$s3,2,8
320 # else
321 lw $t4,0($i0) # Te2[s2>>8]
322 $PTR_INS $i0,$s3,2,8
323 lw $t5,0($i1) # Te2[s3>>8]
324 $PTR_INS $i1,$s0,2,8
325 lw $t6,0($i2) # Te2[s0>>8]
326 $PTR_INS $i2,$s1,2,8
327 lw $t7,0($i3) # Te2[s1>>8]
328 $PTR_INS $i3,$s2,2,8
329
330 lw $t8,0($i0) # Te3[s3]
331 _xtr $i0,$s0,24-2
332 lw $t9,0($i1) # Te3[s0]
333 _xtr $i1,$s1,24-2
334 lw $t10,0($i2) # Te3[s1]
335 _xtr $i2,$s2,24-2
336 lw $t11,0($i3) # Te3[s2]
337 _xtr $i3,$s3,24-2
338
339 and $i0,0x3fc
340 and $i1,0x3fc
341 and $i2,0x3fc
342 and $i3,0x3fc
343 $PTR_ADD $i0,$Tbl
344 $PTR_ADD $i1,$Tbl
345 $PTR_ADD $i2,$Tbl
346 $PTR_ADD $i3,$Tbl
347 # endif
348 rotr $t4,$t4,16
349 rotr $t5,$t5,16
350 rotr $t6,$t6,16
351 rotr $t7,$t7,16
352
353 rotr $t8,$t8,24
354 rotr $t9,$t9,24
355 rotr $t10,$t10,24
356 rotr $t11,$t11,24
357 #else
358 lwl $t4,2($i0) # Te2[s2>>8]
359 lwl $t5,2($i1) # Te2[s3>>8]
360 lwl $t6,2($i2) # Te2[s0>>8]
361 lwl $t7,2($i3) # Te2[s1>>8]
362 lwr $t4,1($i0) # Te2[s2>>8]
363 _xtr $i0,$s3,0-2
364 lwr $t5,1($i1) # Te2[s3>>8]
365 _xtr $i1,$s0,0-2
366 lwr $t6,1($i2) # Te2[s0>>8]
367 _xtr $i2,$s1,0-2
368 lwr $t7,1($i3) # Te2[s1>>8]
369 _xtr $i3,$s2,0-2
370
371 and $i0,0x3fc
372 and $i1,0x3fc
373 and $i2,0x3fc
374 and $i3,0x3fc
375 $PTR_ADD $i0,$Tbl
376 $PTR_ADD $i1,$Tbl
377 $PTR_ADD $i2,$Tbl
378 $PTR_ADD $i3,$Tbl
379 lwl $t8,1($i0) # Te3[s3]
380 lwl $t9,1($i1) # Te3[s0]
381 lwl $t10,1($i2) # Te3[s1]
382 lwl $t11,1($i3) # Te3[s2]
383 lwr $t8,0($i0) # Te3[s3]
384 _xtr $i0,$s0,24-2
385 lwr $t9,0($i1) # Te3[s0]
386 _xtr $i1,$s1,24-2
387 lwr $t10,0($i2) # Te3[s1]
388 _xtr $i2,$s2,24-2
389 lwr $t11,0($i3) # Te3[s2]
390 _xtr $i3,$s3,24-2
391
392 and $i0,0x3fc
393 and $i1,0x3fc
394 and $i2,0x3fc
395 and $i3,0x3fc
396 $PTR_ADD $i0,$Tbl
397 $PTR_ADD $i1,$Tbl
398 $PTR_ADD $i2,$Tbl
399 $PTR_ADD $i3,$Tbl
400 #endif
401 xor $t0,$t4
402 lw $t4,0($i0) # Te0[s0>>24]
403 xor $t1,$t5
404 lw $t5,0($i1) # Te0[s1>>24]
405 xor $t2,$t6
406 lw $t6,0($i2) # Te0[s2>>24]
407 xor $t3,$t7
408 lw $t7,0($i3) # Te0[s3>>24]
409
410 xor $t0,$t8
411 lw $s0,0($key0)
412 xor $t1,$t9
413 lw $s1,4($key0)
414 xor $t2,$t10
415 lw $s2,8($key0)
416 xor $t3,$t11
417 lw $s3,12($key0)
418
419 xor $t0,$t4
420 xor $t1,$t5
421 xor $t2,$t6
422 xor $t3,$t7
423
424 subu $cnt,1
425 $PTR_ADD $key0,16
426 xor $s0,$t0
427 xor $s1,$t1
428 xor $s2,$t2
429 xor $s3,$t3
430 .set noreorder
431 bnez $cnt,.Loop_enc
432 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
433 ext $t0,$s1,16,8
434 #endif
435 _xtr $i0,$s1,16-2
436 #endif
437
438 .set reorder
439 _xtr $i1,$s2,16-2
440 _xtr $i2,$s3,16-2
441 _xtr $i3,$s0,16-2
442 and $i0,0x3fc
443 and $i1,0x3fc
444 and $i2,0x3fc
445 and $i3,0x3fc
446 $PTR_ADD $i0,$Tbl
447 $PTR_ADD $i1,$Tbl
448 $PTR_ADD $i2,$Tbl
449 $PTR_ADD $i3,$Tbl
450 lbu $t0,2($i0) # Te4[s1>>16]
451 _xtr $i0,$s2,8-2
452 lbu $t1,2($i1) # Te4[s2>>16]
453 _xtr $i1,$s3,8-2
454 lbu $t2,2($i2) # Te4[s3>>16]
455 _xtr $i2,$s0,8-2
456 lbu $t3,2($i3) # Te4[s0>>16]
457 _xtr $i3,$s1,8-2
458
459 and $i0,0x3fc
460 and $i1,0x3fc
461 and $i2,0x3fc
462 and $i3,0x3fc
463 $PTR_ADD $i0,$Tbl
464 $PTR_ADD $i1,$Tbl
465 $PTR_ADD $i2,$Tbl
466 $PTR_ADD $i3,$Tbl
467 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
468 # if defined(_MIPSEL)
469 lbu $t4,2($i0) # Te4[s2>>8]
470 $PTR_INS $i0,$s0,2,8
471 lbu $t5,2($i1) # Te4[s3>>8]
472 $PTR_INS $i1,$s1,2,8
473 lbu $t6,2($i2) # Te4[s0>>8]
474 $PTR_INS $i2,$s2,2,8
475 lbu $t7,2($i3) # Te4[s1>>8]
476 $PTR_INS $i3,$s3,2,8
477
478 lbu $t8,2($i0) # Te4[s0>>24]
479 _xtr $i0,$s3,0-2
480 lbu $t9,2($i1) # Te4[s1>>24]
481 _xtr $i1,$s0,0-2
482 lbu $t10,2($i2) # Te4[s2>>24]
483 _xtr $i2,$s1,0-2
484 lbu $t11,2($i3) # Te4[s3>>24]
485 _xtr $i3,$s2,0-2
486
487 and $i0,0x3fc
488 and $i1,0x3fc
489 and $i2,0x3fc
490 and $i3,0x3fc
491 $PTR_ADD $i0,$Tbl
492 $PTR_ADD $i1,$Tbl
493 $PTR_ADD $i2,$Tbl
494 $PTR_ADD $i3,$Tbl
495 # else
496 lbu $t4,2($i0) # Te4[s2>>8]
497 _xtr $i0,$s0,24-2
498 lbu $t5,2($i1) # Te4[s3>>8]
499 _xtr $i1,$s1,24-2
500 lbu $t6,2($i2) # Te4[s0>>8]
501 _xtr $i2,$s2,24-2
502 lbu $t7,2($i3) # Te4[s1>>8]
503 _xtr $i3,$s3,24-2
504
505 and $i0,0x3fc
506 and $i1,0x3fc
507 and $i2,0x3fc
508 and $i3,0x3fc
509 $PTR_ADD $i0,$Tbl
510 $PTR_ADD $i1,$Tbl
511 $PTR_ADD $i2,$Tbl
512 $PTR_ADD $i3,$Tbl
513 lbu $t8,2($i0) # Te4[s0>>24]
514 $PTR_INS $i0,$s3,2,8
515 lbu $t9,2($i1) # Te4[s1>>24]
516 $PTR_INS $i1,$s0,2,8
517 lbu $t10,2($i2) # Te4[s2>>24]
518 $PTR_INS $i2,$s1,2,8
519 lbu $t11,2($i3) # Te4[s3>>24]
520 $PTR_INS $i3,$s2,2,8
521 # endif
522 _ins $t0,16
523 _ins $t1,16
524 _ins $t2,16
525 _ins $t3,16
526
527 _ins2 $t0,$t4,8
528 lbu $t4,2($i0) # Te4[s3]
529 _ins2 $t1,$t5,8
530 lbu $t5,2($i1) # Te4[s0]
531 _ins2 $t2,$t6,8
532 lbu $t6,2($i2) # Te4[s1]
533 _ins2 $t3,$t7,8
534 lbu $t7,2($i3) # Te4[s2]
535
536 _ins2 $t0,$t8,24
537 lw $s0,0($key0)
538 _ins2 $t1,$t9,24
539 lw $s1,4($key0)
540 _ins2 $t2,$t10,24
541 lw $s2,8($key0)
542 _ins2 $t3,$t11,24
543 lw $s3,12($key0)
544
545 _ins2 $t0,$t4,0
546 _ins2 $t1,$t5,0
547 _ins2 $t2,$t6,0
548 _ins2 $t3,$t7,0
549 #else
550 lbu $t4,2($i0) # Te4[s2>>8]
551 _xtr $i0,$s0,24-2
552 lbu $t5,2($i1) # Te4[s3>>8]
553 _xtr $i1,$s1,24-2
554 lbu $t6,2($i2) # Te4[s0>>8]
555 _xtr $i2,$s2,24-2
556 lbu $t7,2($i3) # Te4[s1>>8]
557 _xtr $i3,$s3,24-2
558
559 and $i0,0x3fc
560 and $i1,0x3fc
561 and $i2,0x3fc
562 and $i3,0x3fc
563 $PTR_ADD $i0,$Tbl
564 $PTR_ADD $i1,$Tbl
565 $PTR_ADD $i2,$Tbl
566 $PTR_ADD $i3,$Tbl
567 lbu $t8,2($i0) # Te4[s0>>24]
568 _xtr $i0,$s3,0-2
569 lbu $t9,2($i1) # Te4[s1>>24]
570 _xtr $i1,$s0,0-2
571 lbu $t10,2($i2) # Te4[s2>>24]
572 _xtr $i2,$s1,0-2
573 lbu $t11,2($i3) # Te4[s3>>24]
574 _xtr $i3,$s2,0-2
575
576 and $i0,0x3fc
577 and $i1,0x3fc
578 and $i2,0x3fc
579 and $i3,0x3fc
580 $PTR_ADD $i0,$Tbl
581 $PTR_ADD $i1,$Tbl
582 $PTR_ADD $i2,$Tbl
583 $PTR_ADD $i3,$Tbl
584
585 _ins $t0,16
586 _ins $t1,16
587 _ins $t2,16
588 _ins $t3,16
589
590 _ins $t4,8
591 _ins $t5,8
592 _ins $t6,8
593 _ins $t7,8
594
595 xor $t0,$t4
596 lbu $t4,2($i0) # Te4[s3]
597 xor $t1,$t5
598 lbu $t5,2($i1) # Te4[s0]
599 xor $t2,$t6
600 lbu $t6,2($i2) # Te4[s1]
601 xor $t3,$t7
602 lbu $t7,2($i3) # Te4[s2]
603
604 _ins $t8,24
605 lw $s0,0($key0)
606 _ins $t9,24
607 lw $s1,4($key0)
608 _ins $t10,24
609 lw $s2,8($key0)
610 _ins $t11,24
611 lw $s3,12($key0)
612
613 xor $t0,$t8
614 xor $t1,$t9
615 xor $t2,$t10
616 xor $t3,$t11
617
618 _ins $t4,0
619 _ins $t5,0
620 _ins $t6,0
621 _ins $t7,0
622
623 xor $t0,$t4
624 xor $t1,$t5
625 xor $t2,$t6
626 xor $t3,$t7
627 #endif
628 xor $s0,$t0
629 xor $s1,$t1
630 xor $s2,$t2
631 xor $s3,$t3
632
633 jr $ra
634 .end _mips_AES_encrypt
635
636 .align 5
637 .globl AES_encrypt
638 .ent AES_encrypt
639 AES_encrypt:
640 .frame $sp,$FRAMESIZE,$ra
641 .mask $SAVED_REGS_MASK,-$SZREG
642 .set noreorder
643 ___
644 $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
645 .cpload $pf
646 ___
647 $code.=<<___;
648 $PTR_SUB $sp,$FRAMESIZE
649 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
650 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
651 $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
652 $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
653 $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
654 $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
655 $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
656 $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
657 $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
658 $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
659 ___
660 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
661 $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
662 $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
663 $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
664 $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
665 $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
666 ___
667 $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
668 .cplocal $Tbl
669 .cpsetup $pf,$zero,AES_encrypt
670 ___
671 $code.=<<___;
672 .set reorder
673 $PTR_LA $Tbl,AES_Te # PIC-ified 'load address'
674
675 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
676 lw $s0,0($inp)
677 lw $s1,4($inp)
678 lw $s2,8($inp)
679 lw $s3,12($inp)
680 #else
681 lwl $s0,0+$MSB($inp)
682 lwl $s1,4+$MSB($inp)
683 lwl $s2,8+$MSB($inp)
684 lwl $s3,12+$MSB($inp)
685 lwr $s0,0+$LSB($inp)
686 lwr $s1,4+$LSB($inp)
687 lwr $s2,8+$LSB($inp)
688 lwr $s3,12+$LSB($inp)
689 #endif
690
691 bal _mips_AES_encrypt
692
693 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
694 sw $s0,0($out)
695 sw $s1,4($out)
696 sw $s2,8($out)
697 sw $s3,12($out)
698 #else
699 swr $s0,0+$LSB($out)
700 swr $s1,4+$LSB($out)
701 swr $s2,8+$LSB($out)
702 swr $s3,12+$LSB($out)
703 swl $s0,0+$MSB($out)
704 swl $s1,4+$MSB($out)
705 swl $s2,8+$MSB($out)
706 swl $s3,12+$MSB($out)
707 #endif
708
709 .set noreorder
710 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
711 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
712 $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
713 $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
714 $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
715 $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
716 $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
717 $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
718 $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
719 $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
720 ___
721 $code.=<<___ if ($flavour =~ /nubi/i);
722 $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
723 $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
724 $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
725 $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
726 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
727 ___
728 $code.=<<___;
729 jr $ra
730 $PTR_ADD $sp,$FRAMESIZE
731 .end AES_encrypt
732 ___
733 \f
734 $code.=<<___;
735 .align 5
736 .ent _mips_AES_decrypt
737 _mips_AES_decrypt:
738 .frame $sp,0,$ra
739 .set reorder
740 lw $t0,0($key)
741 lw $t1,4($key)
742 lw $t2,8($key)
743 lw $t3,12($key)
744 lw $cnt,240($key)
745 $PTR_ADD $key0,$key,16
746
747 xor $s0,$t0
748 xor $s1,$t1
749 xor $s2,$t2
750 xor $s3,$t3
751
752 subu $cnt,1
753 #if defined(__mips_smartmips)
754 ext $i0,$s3,16,8
755 .Loop_dec:
756 ext $i1,$s0,16,8
757 ext $i2,$s1,16,8
758 ext $i3,$s2,16,8
759 lwxs $t0,$i0($Tbl) # Td1[s3>>16]
760 ext $i0,$s2,8,8
761 lwxs $t1,$i1($Tbl) # Td1[s0>>16]
762 ext $i1,$s3,8,8
763 lwxs $t2,$i2($Tbl) # Td1[s1>>16]
764 ext $i2,$s0,8,8
765 lwxs $t3,$i3($Tbl) # Td1[s2>>16]
766 ext $i3,$s1,8,8
767
768 lwxs $t4,$i0($Tbl) # Td2[s2>>8]
769 ext $i0,$s1,0,8
770 lwxs $t5,$i1($Tbl) # Td2[s3>>8]
771 ext $i1,$s2,0,8
772 lwxs $t6,$i2($Tbl) # Td2[s0>>8]
773 ext $i2,$s3,0,8
774 lwxs $t7,$i3($Tbl) # Td2[s1>>8]
775 ext $i3,$s0,0,8
776
777 lwxs $t8,$i0($Tbl) # Td3[s1]
778 ext $i0,$s0,24,8
779 lwxs $t9,$i1($Tbl) # Td3[s2]
780 ext $i1,$s1,24,8
781 lwxs $t10,$i2($Tbl) # Td3[s3]
782 ext $i2,$s2,24,8
783 lwxs $t11,$i3($Tbl) # Td3[s0]
784 ext $i3,$s3,24,8
785
786 rotr $t0,$t0,8
787 rotr $t1,$t1,8
788 rotr $t2,$t2,8
789 rotr $t3,$t3,8
790
791 rotr $t4,$t4,16
792 rotr $t5,$t5,16
793 rotr $t6,$t6,16
794 rotr $t7,$t7,16
795
796 xor $t0,$t4
797 lwxs $t4,$i0($Tbl) # Td0[s0>>24]
798 xor $t1,$t5
799 lwxs $t5,$i1($Tbl) # Td0[s1>>24]
800 xor $t2,$t6
801 lwxs $t6,$i2($Tbl) # Td0[s2>>24]
802 xor $t3,$t7
803 lwxs $t7,$i3($Tbl) # Td0[s3>>24]
804
805 rotr $t8,$t8,24
806 lw $s0,0($key0)
807 rotr $t9,$t9,24
808 lw $s1,4($key0)
809 rotr $t10,$t10,24
810 lw $s2,8($key0)
811 rotr $t11,$t11,24
812 lw $s3,12($key0)
813
814 xor $t0,$t8
815 xor $t1,$t9
816 xor $t2,$t10
817 xor $t3,$t11
818
819 xor $t0,$t4
820 xor $t1,$t5
821 xor $t2,$t6
822 xor $t3,$t7
823
824 subu $cnt,1
825 $PTR_ADD $key0,16
826 xor $s0,$t0
827 xor $s1,$t1
828 xor $s2,$t2
829 xor $s3,$t3
830 .set noreorder
831 bnez $cnt,.Loop_dec
832 ext $i0,$s3,16,8
833
834 _xtr $i0,$s3,16-2
835 #else
836 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
837 move $i0,$Tbl
838 move $i1,$Tbl
839 move $i2,$Tbl
840 move $i3,$Tbl
841 ext $t0,$s3,16,8
842 .Loop_dec:
843 ext $t1,$s0,16,8
844 ext $t2,$s1,16,8
845 ext $t3,$s2,16,8
846 $PTR_INS $i0,$t0,2,8
847 $PTR_INS $i1,$t1,2,8
848 $PTR_INS $i2,$t2,2,8
849 $PTR_INS $i3,$t3,2,8
850 lw $t0,0($i0) # Td1[s3>>16]
851 ext $t4,$s2,8,8
852 lw $t1,0($i1) # Td1[s0>>16]
853 ext $t5,$s3,8,8
854 lw $t2,0($i2) # Td1[s1>>16]
855 ext $t6,$s0,8,8
856 lw $t3,0($i3) # Td1[s2>>16]
857 ext $t7,$s1,8,8
858 $PTR_INS $i0,$t4,2,8
859 $PTR_INS $i1,$t5,2,8
860 $PTR_INS $i2,$t6,2,8
861 $PTR_INS $i3,$t7,2,8
862 #else
863 _xtr $i0,$s3,16-2
864 .Loop_dec:
865 _xtr $i1,$s0,16-2
866 _xtr $i2,$s1,16-2
867 _xtr $i3,$s2,16-2
868 and $i0,0x3fc
869 and $i1,0x3fc
870 and $i2,0x3fc
871 and $i3,0x3fc
872 $PTR_ADD $i0,$Tbl
873 $PTR_ADD $i1,$Tbl
874 $PTR_ADD $i2,$Tbl
875 $PTR_ADD $i3,$Tbl
876 lwl $t0,3($i0) # Td1[s3>>16]
877 lwl $t1,3($i1) # Td1[s0>>16]
878 lwl $t2,3($i2) # Td1[s1>>16]
879 lwl $t3,3($i3) # Td1[s2>>16]
880 lwr $t0,2($i0) # Td1[s3>>16]
881 _xtr $i0,$s2,8-2
882 lwr $t1,2($i1) # Td1[s0>>16]
883 _xtr $i1,$s3,8-2
884 lwr $t2,2($i2) # Td1[s1>>16]
885 _xtr $i2,$s0,8-2
886 lwr $t3,2($i3) # Td1[s2>>16]
887 _xtr $i3,$s1,8-2
888 and $i0,0x3fc
889 and $i1,0x3fc
890 and $i2,0x3fc
891 and $i3,0x3fc
892 $PTR_ADD $i0,$Tbl
893 $PTR_ADD $i1,$Tbl
894 $PTR_ADD $i2,$Tbl
895 $PTR_ADD $i3,$Tbl
896 #endif
897 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
898 rotr $t0,$t0,8
899 rotr $t1,$t1,8
900 rotr $t2,$t2,8
901 rotr $t3,$t3,8
902 # if defined(_MIPSEL)
903 lw $t4,0($i0) # Td2[s2>>8]
904 ext $t8,$s1,0,8
905 lw $t5,0($i1) # Td2[s3>>8]
906 ext $t9,$s2,0,8
907 lw $t6,0($i2) # Td2[s0>>8]
908 ext $t10,$s3,0,8
909 lw $t7,0($i3) # Td2[s1>>8]
910 ext $t11,$s0,0,8
911 $PTR_INS $i0,$t8,2,8
912 $PTR_INS $i1,$t9,2,8
913 $PTR_INS $i2,$t10,2,8
914 $PTR_INS $i3,$t11,2,8
915 lw $t8,0($i0) # Td3[s1]
916 $PTR_INS $i0,$s0,2,8
917 lw $t9,0($i1) # Td3[s2]
918 $PTR_INS $i1,$s1,2,8
919 lw $t10,0($i2) # Td3[s3]
920 $PTR_INS $i2,$s2,2,8
921 lw $t11,0($i3) # Td3[s0]
922 $PTR_INS $i3,$s3,2,8
923 #else
924 lw $t4,0($i0) # Td2[s2>>8]
925 $PTR_INS $i0,$s1,2,8
926 lw $t5,0($i1) # Td2[s3>>8]
927 $PTR_INS $i1,$s2,2,8
928 lw $t6,0($i2) # Td2[s0>>8]
929 $PTR_INS $i2,$s3,2,8
930 lw $t7,0($i3) # Td2[s1>>8]
931 $PTR_INS $i3,$s0,2,8
932
933 lw $t8,0($i0) # Td3[s1]
934 _xtr $i0,$s0,24-2
935 lw $t9,0($i1) # Td3[s2]
936 _xtr $i1,$s1,24-2
937 lw $t10,0($i2) # Td3[s3]
938 _xtr $i2,$s2,24-2
939 lw $t11,0($i3) # Td3[s0]
940 _xtr $i3,$s3,24-2
941
942 and $i0,0x3fc
943 and $i1,0x3fc
944 and $i2,0x3fc
945 and $i3,0x3fc
946 $PTR_ADD $i0,$Tbl
947 $PTR_ADD $i1,$Tbl
948 $PTR_ADD $i2,$Tbl
949 $PTR_ADD $i3,$Tbl
950 #endif
951 rotr $t4,$t4,16
952 rotr $t5,$t5,16
953 rotr $t6,$t6,16
954 rotr $t7,$t7,16
955
956 rotr $t8,$t8,24
957 rotr $t9,$t9,24
958 rotr $t10,$t10,24
959 rotr $t11,$t11,24
960 #else
961 lwl $t4,2($i0) # Td2[s2>>8]
962 lwl $t5,2($i1) # Td2[s3>>8]
963 lwl $t6,2($i2) # Td2[s0>>8]
964 lwl $t7,2($i3) # Td2[s1>>8]
965 lwr $t4,1($i0) # Td2[s2>>8]
966 _xtr $i0,$s1,0-2
967 lwr $t5,1($i1) # Td2[s3>>8]
968 _xtr $i1,$s2,0-2
969 lwr $t6,1($i2) # Td2[s0>>8]
970 _xtr $i2,$s3,0-2
971 lwr $t7,1($i3) # Td2[s1>>8]
972 _xtr $i3,$s0,0-2
973
974 and $i0,0x3fc
975 and $i1,0x3fc
976 and $i2,0x3fc
977 and $i3,0x3fc
978 $PTR_ADD $i0,$Tbl
979 $PTR_ADD $i1,$Tbl
980 $PTR_ADD $i2,$Tbl
981 $PTR_ADD $i3,$Tbl
982 lwl $t8,1($i0) # Td3[s1]
983 lwl $t9,1($i1) # Td3[s2]
984 lwl $t10,1($i2) # Td3[s3]
985 lwl $t11,1($i3) # Td3[s0]
986 lwr $t8,0($i0) # Td3[s1]
987 _xtr $i0,$s0,24-2
988 lwr $t9,0($i1) # Td3[s2]
989 _xtr $i1,$s1,24-2
990 lwr $t10,0($i2) # Td3[s3]
991 _xtr $i2,$s2,24-2
992 lwr $t11,0($i3) # Td3[s0]
993 _xtr $i3,$s3,24-2
994
995 and $i0,0x3fc
996 and $i1,0x3fc
997 and $i2,0x3fc
998 and $i3,0x3fc
999 $PTR_ADD $i0,$Tbl
1000 $PTR_ADD $i1,$Tbl
1001 $PTR_ADD $i2,$Tbl
1002 $PTR_ADD $i3,$Tbl
1003 #endif
1004
1005 xor $t0,$t4
1006 lw $t4,0($i0) # Td0[s0>>24]
1007 xor $t1,$t5
1008 lw $t5,0($i1) # Td0[s1>>24]
1009 xor $t2,$t6
1010 lw $t6,0($i2) # Td0[s2>>24]
1011 xor $t3,$t7
1012 lw $t7,0($i3) # Td0[s3>>24]
1013
1014 xor $t0,$t8
1015 lw $s0,0($key0)
1016 xor $t1,$t9
1017 lw $s1,4($key0)
1018 xor $t2,$t10
1019 lw $s2,8($key0)
1020 xor $t3,$t11
1021 lw $s3,12($key0)
1022
1023 xor $t0,$t4
1024 xor $t1,$t5
1025 xor $t2,$t6
1026 xor $t3,$t7
1027
1028 subu $cnt,1
1029 $PTR_ADD $key0,16
1030 xor $s0,$t0
1031 xor $s1,$t1
1032 xor $s2,$t2
1033 xor $s3,$t3
1034 .set noreorder
1035 bnez $cnt,.Loop_dec
1036 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1037 ext $t0,$s3,16,8
1038 #endif
1039
1040 _xtr $i0,$s3,16-2
1041 #endif
1042
1043 .set reorder
1044 lw $t4,1024($Tbl) # prefetch Td4
1045 _xtr $i0,$s3,16
1046 lw $t5,1024+32($Tbl)
1047 _xtr $i1,$s0,16
1048 lw $t6,1024+64($Tbl)
1049 _xtr $i2,$s1,16
1050 lw $t7,1024+96($Tbl)
1051 _xtr $i3,$s2,16
1052 lw $t8,1024+128($Tbl)
1053 and $i0,0xff
1054 lw $t9,1024+160($Tbl)
1055 and $i1,0xff
1056 lw $t10,1024+192($Tbl)
1057 and $i2,0xff
1058 lw $t11,1024+224($Tbl)
1059 and $i3,0xff
1060
1061 $PTR_ADD $i0,$Tbl
1062 $PTR_ADD $i1,$Tbl
1063 $PTR_ADD $i2,$Tbl
1064 $PTR_ADD $i3,$Tbl
1065 lbu $t0,1024($i0) # Td4[s3>>16]
1066 _xtr $i0,$s2,8
1067 lbu $t1,1024($i1) # Td4[s0>>16]
1068 _xtr $i1,$s3,8
1069 lbu $t2,1024($i2) # Td4[s1>>16]
1070 _xtr $i2,$s0,8
1071 lbu $t3,1024($i3) # Td4[s2>>16]
1072 _xtr $i3,$s1,8
1073
1074 and $i0,0xff
1075 and $i1,0xff
1076 and $i2,0xff
1077 and $i3,0xff
1078 $PTR_ADD $i0,$Tbl
1079 $PTR_ADD $i1,$Tbl
1080 $PTR_ADD $i2,$Tbl
1081 $PTR_ADD $i3,$Tbl
1082 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1083 # if defined(_MIPSEL)
1084 lbu $t4,1024($i0) # Td4[s2>>8]
1085 $PTR_INS $i0,$s0,0,8
1086 lbu $t5,1024($i1) # Td4[s3>>8]
1087 $PTR_INS $i1,$s1,0,8
1088 lbu $t6,1024($i2) # Td4[s0>>8]
1089 $PTR_INS $i2,$s2,0,8
1090 lbu $t7,1024($i3) # Td4[s1>>8]
1091 $PTR_INS $i3,$s3,0,8
1092
1093 lbu $t8,1024($i0) # Td4[s0>>24]
1094 _xtr $i0,$s1,0
1095 lbu $t9,1024($i1) # Td4[s1>>24]
1096 _xtr $i1,$s2,0
1097 lbu $t10,1024($i2) # Td4[s2>>24]
1098 _xtr $i2,$s3,0
1099 lbu $t11,1024($i3) # Td4[s3>>24]
1100 _xtr $i3,$s0,0
1101
1102 $PTR_ADD $i0,$Tbl
1103 $PTR_ADD $i1,$Tbl
1104 $PTR_ADD $i2,$Tbl
1105 $PTR_ADD $i3,$Tbl
1106 # else
1107 lbu $t4,1024($i0) # Td4[s2>>8]
1108 _xtr $i0,$s0,24
1109 lbu $t5,1024($i1) # Td4[s3>>8]
1110 _xtr $i1,$s1,24
1111 lbu $t6,1024($i2) # Td4[s0>>8]
1112 _xtr $i2,$s2,24
1113 lbu $t7,1024($i3) # Td4[s1>>8]
1114 _xtr $i3,$s3,24
1115
1116 $PTR_ADD $i0,$Tbl
1117 $PTR_ADD $i1,$Tbl
1118 $PTR_ADD $i2,$Tbl
1119 $PTR_ADD $i3,$Tbl
1120 lbu $t8,1024($i0) # Td4[s0>>24]
1121 $PTR_INS $i0,$s1,0,8
1122 lbu $t9,1024($i1) # Td4[s1>>24]
1123 $PTR_INS $i1,$s2,0,8
1124 lbu $t10,1024($i2) # Td4[s2>>24]
1125 $PTR_INS $i2,$s3,0,8
1126 lbu $t11,1024($i3) # Td4[s3>>24]
1127 $PTR_INS $i3,$s0,0,8
1128 # endif
1129 _ins $t0,16
1130 _ins $t1,16
1131 _ins $t2,16
1132 _ins $t3,16
1133
1134 _ins2 $t0,$t4,8
1135 lbu $t4,1024($i0) # Td4[s1]
1136 _ins2 $t1,$t5,8
1137 lbu $t5,1024($i1) # Td4[s2]
1138 _ins2 $t2,$t6,8
1139 lbu $t6,1024($i2) # Td4[s3]
1140 _ins2 $t3,$t7,8
1141 lbu $t7,1024($i3) # Td4[s0]
1142
1143 _ins2 $t0,$t8,24
1144 lw $s0,0($key0)
1145 _ins2 $t1,$t9,24
1146 lw $s1,4($key0)
1147 _ins2 $t2,$t10,24
1148 lw $s2,8($key0)
1149 _ins2 $t3,$t11,24
1150 lw $s3,12($key0)
1151
1152 _ins2 $t0,$t4,0
1153 _ins2 $t1,$t5,0
1154 _ins2 $t2,$t6,0
1155 _ins2 $t3,$t7,0
1156 #else
1157 lbu $t4,1024($i0) # Td4[s2>>8]
1158 _xtr $i0,$s0,24
1159 lbu $t5,1024($i1) # Td4[s3>>8]
1160 _xtr $i1,$s1,24
1161 lbu $t6,1024($i2) # Td4[s0>>8]
1162 _xtr $i2,$s2,24
1163 lbu $t7,1024($i3) # Td4[s1>>8]
1164 _xtr $i3,$s3,24
1165
1166 $PTR_ADD $i0,$Tbl
1167 $PTR_ADD $i1,$Tbl
1168 $PTR_ADD $i2,$Tbl
1169 $PTR_ADD $i3,$Tbl
1170 lbu $t8,1024($i0) # Td4[s0>>24]
1171 _xtr $i0,$s1,0
1172 lbu $t9,1024($i1) # Td4[s1>>24]
1173 _xtr $i1,$s2,0
1174 lbu $t10,1024($i2) # Td4[s2>>24]
1175 _xtr $i2,$s3,0
1176 lbu $t11,1024($i3) # Td4[s3>>24]
1177 _xtr $i3,$s0,0
1178
1179 $PTR_ADD $i0,$Tbl
1180 $PTR_ADD $i1,$Tbl
1181 $PTR_ADD $i2,$Tbl
1182 $PTR_ADD $i3,$Tbl
1183
1184 _ins $t0,16
1185 _ins $t1,16
1186 _ins $t2,16
1187 _ins $t3,16
1188
1189 _ins $t4,8
1190 _ins $t5,8
1191 _ins $t6,8
1192 _ins $t7,8
1193
1194 xor $t0,$t4
1195 lbu $t4,1024($i0) # Td4[s1]
1196 xor $t1,$t5
1197 lbu $t5,1024($i1) # Td4[s2]
1198 xor $t2,$t6
1199 lbu $t6,1024($i2) # Td4[s3]
1200 xor $t3,$t7
1201 lbu $t7,1024($i3) # Td4[s0]
1202
1203 _ins $t8,24
1204 lw $s0,0($key0)
1205 _ins $t9,24
1206 lw $s1,4($key0)
1207 _ins $t10,24
1208 lw $s2,8($key0)
1209 _ins $t11,24
1210 lw $s3,12($key0)
1211
1212 xor $t0,$t8
1213 xor $t1,$t9
1214 xor $t2,$t10
1215 xor $t3,$t11
1216
1217 _ins $t4,0
1218 _ins $t5,0
1219 _ins $t6,0
1220 _ins $t7,0
1221
1222 xor $t0,$t4
1223 xor $t1,$t5
1224 xor $t2,$t6
1225 xor $t3,$t7
1226 #endif
1227
1228 xor $s0,$t0
1229 xor $s1,$t1
1230 xor $s2,$t2
1231 xor $s3,$t3
1232
1233 jr $ra
1234 .end _mips_AES_decrypt
1235
1236 .align 5
1237 .globl AES_decrypt
1238 .ent AES_decrypt
1239 AES_decrypt:
1240 .frame $sp,$FRAMESIZE,$ra
1241 .mask $SAVED_REGS_MASK,-$SZREG
1242 .set noreorder
1243 ___
1244 $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
1245 .cpload $pf
1246 ___
1247 $code.=<<___;
1248 $PTR_SUB $sp,$FRAMESIZE
1249 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
1250 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
1251 $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
1252 $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
1253 $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
1254 $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
1255 $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
1256 $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
1257 $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
1258 $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
1259 ___
1260 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1261 $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
1262 $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
1263 $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
1264 $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
1265 $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
1266 ___
1267 $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1268 .cplocal $Tbl
1269 .cpsetup $pf,$zero,AES_decrypt
1270 ___
1271 $code.=<<___;
1272 .set reorder
1273 $PTR_LA $Tbl,AES_Td # PIC-ified 'load address'
1274
1275 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1276 lw $s0,0($inp)
1277 lw $s1,4($inp)
1278 lw $s2,8($inp)
1279 lw $s3,12($inp)
1280 #else
1281 lwl $s0,0+$MSB($inp)
1282 lwl $s1,4+$MSB($inp)
1283 lwl $s2,8+$MSB($inp)
1284 lwl $s3,12+$MSB($inp)
1285 lwr $s0,0+$LSB($inp)
1286 lwr $s1,4+$LSB($inp)
1287 lwr $s2,8+$LSB($inp)
1288 lwr $s3,12+$LSB($inp)
1289 #endif
1290
1291 bal _mips_AES_decrypt
1292
1293 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1294 sw $s0,0($out)
1295 sw $s1,4($out)
1296 sw $s2,8($out)
1297 sw $s3,12($out)
1298 #else
1299 swr $s0,0+$LSB($out)
1300 swr $s1,4+$LSB($out)
1301 swr $s2,8+$LSB($out)
1302 swr $s3,12+$LSB($out)
1303 swl $s0,0+$MSB($out)
1304 swl $s1,4+$MSB($out)
1305 swl $s2,8+$MSB($out)
1306 swl $s3,12+$MSB($out)
1307 #endif
1308
1309 .set noreorder
1310 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
1311 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
1312 $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
1313 $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
1314 $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
1315 $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
1316 $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
1317 $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
1318 $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
1319 $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
1320 ___
1321 $code.=<<___ if ($flavour =~ /nubi/i);
1322 $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
1323 $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
1324 $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
1325 $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
1326 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
1327 ___
1328 $code.=<<___;
1329 jr $ra
1330 $PTR_ADD $sp,$FRAMESIZE
1331 .end AES_decrypt
1332 ___
1333 }}}
1334 \f
1335 {{{
1336 my $FRAMESIZE=8*$SZREG;
1337 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1338
1339 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1340 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1341 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1342 my ($rcon,$cnt)=($gp,$fp);
1343
1344 $code.=<<___;
1345 .align 5
1346 .ent _mips_AES_set_encrypt_key
1347 _mips_AES_set_encrypt_key:
1348 .frame $sp,0,$ra
1349 .set noreorder
1350 beqz $inp,.Lekey_done
1351 li $t0,-1
1352 beqz $key,.Lekey_done
1353 $PTR_ADD $rcon,$Tbl,256
1354
1355 .set reorder
1356 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1357 lw $rk0,0($inp) # load 128 bits
1358 lw $rk1,4($inp)
1359 lw $rk2,8($inp)
1360 lw $rk3,12($inp)
1361 #else
1362 lwl $rk0,0+$MSB($inp) # load 128 bits
1363 lwl $rk1,4+$MSB($inp)
1364 lwl $rk2,8+$MSB($inp)
1365 lwl $rk3,12+$MSB($inp)
1366 lwr $rk0,0+$LSB($inp)
1367 lwr $rk1,4+$LSB($inp)
1368 lwr $rk2,8+$LSB($inp)
1369 lwr $rk3,12+$LSB($inp)
1370 #endif
1371 li $at,128
1372 .set noreorder
1373 beq $bits,$at,.L128bits
1374 li $cnt,10
1375
1376 .set reorder
1377 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1378 lw $rk4,16($inp) # load 192 bits
1379 lw $rk5,20($inp)
1380 #else
1381 lwl $rk4,16+$MSB($inp) # load 192 bits
1382 lwl $rk5,20+$MSB($inp)
1383 lwr $rk4,16+$LSB($inp)
1384 lwr $rk5,20+$LSB($inp)
1385 #endif
1386 li $at,192
1387 .set noreorder
1388 beq $bits,$at,.L192bits
1389 li $cnt,8
1390
1391 .set reorder
1392 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1393 lw $rk6,24($inp) # load 256 bits
1394 lw $rk7,28($inp)
1395 #else
1396 lwl $rk6,24+$MSB($inp) # load 256 bits
1397 lwl $rk7,28+$MSB($inp)
1398 lwr $rk6,24+$LSB($inp)
1399 lwr $rk7,28+$LSB($inp)
1400 #endif
1401 li $at,256
1402 .set noreorder
1403 beq $bits,$at,.L256bits
1404 li $cnt,7
1405
1406 b .Lekey_done
1407 li $t0,-2
1408
1409 .align 4
1410 .L128bits:
1411 .set reorder
1412 srl $i0,$rk3,16
1413 srl $i1,$rk3,8
1414 and $i0,0xff
1415 and $i1,0xff
1416 and $i2,$rk3,0xff
1417 srl $i3,$rk3,24
1418 $PTR_ADD $i0,$Tbl
1419 $PTR_ADD $i1,$Tbl
1420 $PTR_ADD $i2,$Tbl
1421 $PTR_ADD $i3,$Tbl
1422 lbu $i0,0($i0)
1423 lbu $i1,0($i1)
1424 lbu $i2,0($i2)
1425 lbu $i3,0($i3)
1426
1427 sw $rk0,0($key)
1428 sw $rk1,4($key)
1429 sw $rk2,8($key)
1430 sw $rk3,12($key)
1431 subu $cnt,1
1432 $PTR_ADD $key,16
1433
1434 _bias $i0,24
1435 _bias $i1,16
1436 _bias $i2,8
1437 _bias $i3,0
1438
1439 xor $rk0,$i0
1440 lw $i0,0($rcon)
1441 xor $rk0,$i1
1442 xor $rk0,$i2
1443 xor $rk0,$i3
1444 xor $rk0,$i0
1445
1446 xor $rk1,$rk0
1447 xor $rk2,$rk1
1448 xor $rk3,$rk2
1449
1450 .set noreorder
1451 bnez $cnt,.L128bits
1452 $PTR_ADD $rcon,4
1453
1454 sw $rk0,0($key)
1455 sw $rk1,4($key)
1456 sw $rk2,8($key)
1457 li $cnt,10
1458 sw $rk3,12($key)
1459 li $t0,0
1460 sw $cnt,80($key)
1461 b .Lekey_done
1462 $PTR_SUB $key,10*16
1463
1464 .align 4
1465 .L192bits:
1466 .set reorder
1467 srl $i0,$rk5,16
1468 srl $i1,$rk5,8
1469 and $i0,0xff
1470 and $i1,0xff
1471 and $i2,$rk5,0xff
1472 srl $i3,$rk5,24
1473 $PTR_ADD $i0,$Tbl
1474 $PTR_ADD $i1,$Tbl
1475 $PTR_ADD $i2,$Tbl
1476 $PTR_ADD $i3,$Tbl
1477 lbu $i0,0($i0)
1478 lbu $i1,0($i1)
1479 lbu $i2,0($i2)
1480 lbu $i3,0($i3)
1481
1482 sw $rk0,0($key)
1483 sw $rk1,4($key)
1484 sw $rk2,8($key)
1485 sw $rk3,12($key)
1486 sw $rk4,16($key)
1487 sw $rk5,20($key)
1488 subu $cnt,1
1489 $PTR_ADD $key,24
1490
1491 _bias $i0,24
1492 _bias $i1,16
1493 _bias $i2,8
1494 _bias $i3,0
1495
1496 xor $rk0,$i0
1497 lw $i0,0($rcon)
1498 xor $rk0,$i1
1499 xor $rk0,$i2
1500 xor $rk0,$i3
1501 xor $rk0,$i0
1502
1503 xor $rk1,$rk0
1504 xor $rk2,$rk1
1505 xor $rk3,$rk2
1506 xor $rk4,$rk3
1507 xor $rk5,$rk4
1508
1509 .set noreorder
1510 bnez $cnt,.L192bits
1511 $PTR_ADD $rcon,4
1512
1513 sw $rk0,0($key)
1514 sw $rk1,4($key)
1515 sw $rk2,8($key)
1516 li $cnt,12
1517 sw $rk3,12($key)
1518 li $t0,0
1519 sw $cnt,48($key)
1520 b .Lekey_done
1521 $PTR_SUB $key,12*16
1522
1523 .align 4
1524 .L256bits:
1525 .set reorder
1526 srl $i0,$rk7,16
1527 srl $i1,$rk7,8
1528 and $i0,0xff
1529 and $i1,0xff
1530 and $i2,$rk7,0xff
1531 srl $i3,$rk7,24
1532 $PTR_ADD $i0,$Tbl
1533 $PTR_ADD $i1,$Tbl
1534 $PTR_ADD $i2,$Tbl
1535 $PTR_ADD $i3,$Tbl
1536 lbu $i0,0($i0)
1537 lbu $i1,0($i1)
1538 lbu $i2,0($i2)
1539 lbu $i3,0($i3)
1540
1541 sw $rk0,0($key)
1542 sw $rk1,4($key)
1543 sw $rk2,8($key)
1544 sw $rk3,12($key)
1545 sw $rk4,16($key)
1546 sw $rk5,20($key)
1547 sw $rk6,24($key)
1548 sw $rk7,28($key)
1549 subu $cnt,1
1550
1551 _bias $i0,24
1552 _bias $i1,16
1553 _bias $i2,8
1554 _bias $i3,0
1555
1556 xor $rk0,$i0
1557 lw $i0,0($rcon)
1558 xor $rk0,$i1
1559 xor $rk0,$i2
1560 xor $rk0,$i3
1561 xor $rk0,$i0
1562
1563 xor $rk1,$rk0
1564 xor $rk2,$rk1
1565 xor $rk3,$rk2
1566 beqz $cnt,.L256bits_done
1567
1568 srl $i0,$rk3,24
1569 srl $i1,$rk3,16
1570 srl $i2,$rk3,8
1571 and $i3,$rk3,0xff
1572 and $i1,0xff
1573 and $i2,0xff
1574 $PTR_ADD $i0,$Tbl
1575 $PTR_ADD $i1,$Tbl
1576 $PTR_ADD $i2,$Tbl
1577 $PTR_ADD $i3,$Tbl
1578 lbu $i0,0($i0)
1579 lbu $i1,0($i1)
1580 lbu $i2,0($i2)
1581 lbu $i3,0($i3)
1582 sll $i0,24
1583 sll $i1,16
1584 sll $i2,8
1585
1586 xor $rk4,$i0
1587 xor $rk4,$i1
1588 xor $rk4,$i2
1589 xor $rk4,$i3
1590
1591 xor $rk5,$rk4
1592 xor $rk6,$rk5
1593 xor $rk7,$rk6
1594
1595 $PTR_ADD $key,32
1596 .set noreorder
1597 b .L256bits
1598 $PTR_ADD $rcon,4
1599
1600 .L256bits_done:
1601 sw $rk0,32($key)
1602 sw $rk1,36($key)
1603 sw $rk2,40($key)
1604 li $cnt,14
1605 sw $rk3,44($key)
1606 li $t0,0
1607 sw $cnt,48($key)
1608 $PTR_SUB $key,12*16
1609
1610 .Lekey_done:
1611 jr $ra
1612 nop
1613 .end _mips_AES_set_encrypt_key
1614
1615 .globl AES_set_encrypt_key
1616 .ent AES_set_encrypt_key
1617 AES_set_encrypt_key:
1618 .frame $sp,$FRAMESIZE,$ra
1619 .mask $SAVED_REGS_MASK,-$SZREG
1620 .set noreorder
1621 ___
1622 $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
1623 .cpload $pf
1624 ___
1625 $code.=<<___;
1626 $PTR_SUB $sp,$FRAMESIZE
1627 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
1628 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
1629 ___
1630 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1631 $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
1632 $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
1633 $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
1634 $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
1635 $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
1636 ___
1637 $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1638 .cplocal $Tbl
1639 .cpsetup $pf,$zero,AES_set_encrypt_key
1640 ___
1641 $code.=<<___;
1642 .set reorder
1643 $PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address'
1644
1645 bal _mips_AES_set_encrypt_key
1646
1647 .set noreorder
1648 move $a0,$t0
1649 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
1650 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
1651 ___
1652 $code.=<<___ if ($flavour =~ /nubi/i);
1653 $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
1654 $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
1655 $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
1656 $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
1657 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
1658 ___
1659 $code.=<<___;
1660 jr $ra
1661 $PTR_ADD $sp,$FRAMESIZE
1662 .end AES_set_encrypt_key
1663 ___
1664 \f
1665 my ($head,$tail)=($inp,$bits);
1666 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1667 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1668 $code.=<<___;
1669 .align 5
1670 .globl AES_set_decrypt_key
1671 .ent AES_set_decrypt_key
1672 AES_set_decrypt_key:
1673 .frame $sp,$FRAMESIZE,$ra
1674 .mask $SAVED_REGS_MASK,-$SZREG
1675 .set noreorder
1676 ___
1677 $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
1678 .cpload $pf
1679 ___
1680 $code.=<<___;
1681 $PTR_SUB $sp,$FRAMESIZE
1682 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
1683 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
1684 ___
1685 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1686 $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
1687 $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
1688 $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
1689 $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
1690 $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
1691 ___
1692 $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1693 .cplocal $Tbl
1694 .cpsetup $pf,$zero,AES_set_decrypt_key
1695 ___
1696 $code.=<<___;
1697 .set reorder
1698 $PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address'
1699
1700 bal _mips_AES_set_encrypt_key
1701
1702 bltz $t0,.Ldkey_done
1703
1704 sll $at,$cnt,4
1705 $PTR_ADD $head,$key,0
1706 $PTR_ADD $tail,$key,$at
1707 .align 4
1708 .Lswap:
1709 lw $rk0,0($head)
1710 lw $rk1,4($head)
1711 lw $rk2,8($head)
1712 lw $rk3,12($head)
1713 lw $rk4,0($tail)
1714 lw $rk5,4($tail)
1715 lw $rk6,8($tail)
1716 lw $rk7,12($tail)
1717 sw $rk0,0($tail)
1718 sw $rk1,4($tail)
1719 sw $rk2,8($tail)
1720 sw $rk3,12($tail)
1721 $PTR_ADD $head,16
1722 $PTR_SUB $tail,16
1723 sw $rk4,-16($head)
1724 sw $rk5,-12($head)
1725 sw $rk6,-8($head)
1726 sw $rk7,-4($head)
1727 bne $head,$tail,.Lswap
1728
1729 lw $tp1,16($key) # modulo-scheduled
1730 lui $x80808080,0x8080
1731 subu $cnt,1
1732 or $x80808080,0x8080
1733 sll $cnt,2
1734 $PTR_ADD $key,16
1735 lui $x1b1b1b1b,0x1b1b
1736 nor $x7f7f7f7f,$zero,$x80808080
1737 or $x1b1b1b1b,0x1b1b
1738 .align 4
1739 .Lmix:
1740 and $m,$tp1,$x80808080
1741 and $tp2,$tp1,$x7f7f7f7f
1742 srl $tp4,$m,7
1743 addu $tp2,$tp2 # tp2<<1
1744 subu $m,$tp4
1745 and $m,$x1b1b1b1b
1746 xor $tp2,$m
1747
1748 and $m,$tp2,$x80808080
1749 and $tp4,$tp2,$x7f7f7f7f
1750 srl $tp8,$m,7
1751 addu $tp4,$tp4 # tp4<<1
1752 subu $m,$tp8
1753 and $m,$x1b1b1b1b
1754 xor $tp4,$m
1755
1756 and $m,$tp4,$x80808080
1757 and $tp8,$tp4,$x7f7f7f7f
1758 srl $tp9,$m,7
1759 addu $tp8,$tp8 # tp8<<1
1760 subu $m,$tp9
1761 and $m,$x1b1b1b1b
1762 xor $tp8,$m
1763
1764 xor $tp9,$tp8,$tp1
1765 xor $tpe,$tp8,$tp4
1766 xor $tpb,$tp9,$tp2
1767 xor $tpd,$tp9,$tp4
1768
1769 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1770 rotr $tp1,$tpd,16
1771 xor $tpe,$tp2
1772 rotr $tp2,$tp9,8
1773 xor $tpe,$tp1
1774 rotr $tp4,$tpb,24
1775 xor $tpe,$tp2
1776 lw $tp1,4($key) # modulo-scheduled
1777 xor $tpe,$tp4
1778 #else
1779 _ror $tp1,$tpd,16
1780 xor $tpe,$tp2
1781 _ror $tp2,$tpd,-16
1782 xor $tpe,$tp1
1783 _ror $tp1,$tp9,8
1784 xor $tpe,$tp2
1785 _ror $tp2,$tp9,-24
1786 xor $tpe,$tp1
1787 _ror $tp1,$tpb,24
1788 xor $tpe,$tp2
1789 _ror $tp2,$tpb,-8
1790 xor $tpe,$tp1
1791 lw $tp1,4($key) # modulo-scheduled
1792 xor $tpe,$tp2
1793 #endif
1794 subu $cnt,1
1795 sw $tpe,0($key)
1796 $PTR_ADD $key,4
1797 bnez $cnt,.Lmix
1798
1799 li $t0,0
1800 .Ldkey_done:
1801 .set noreorder
1802 move $a0,$t0
1803 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
1804 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
1805 ___
1806 $code.=<<___ if ($flavour =~ /nubi/i);
1807 $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
1808 $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
1809 $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
1810 $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
1811 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
1812 ___
1813 $code.=<<___;
1814 jr $ra
1815 $PTR_ADD $sp,$FRAMESIZE
1816 .end AES_set_decrypt_key
1817 ___
1818 }}}
1819
1820 ######################################################################
1821 # Tables are kept in endian-neutral manner
1822 $code.=<<___;
1823 .rdata
1824 .align 10
1825 AES_Te:
1826 .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
1827 .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
1828 .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
1829 .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
1830 .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
1831 .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
1832 .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
1833 .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
1834 .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
1835 .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
1836 .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
1837 .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
1838 .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
1839 .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
1840 .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
1841 .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
1842 .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
1843 .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
1844 .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
1845 .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
1846 .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
1847 .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
1848 .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
1849 .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
1850 .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
1851 .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
1852 .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
1853 .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
1854 .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
1855 .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
1856 .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
1857 .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
1858 .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
1859 .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
1860 .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
1861 .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
1862 .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
1863 .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
1864 .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
1865 .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
1866 .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
1867 .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
1868 .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
1869 .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
1870 .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
1871 .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
1872 .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
1873 .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
1874 .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
1875 .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
1876 .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
1877 .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
1878 .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
1879 .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
1880 .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
1881 .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
1882 .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
1883 .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
1884 .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
1885 .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
1886 .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
1887 .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
1888 .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
1889 .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
1890 .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
1891 .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
1892 .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
1893 .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
1894 .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
1895 .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
1896 .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
1897 .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
1898 .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
1899 .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
1900 .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
1901 .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
1902 .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
1903 .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
1904 .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
1905 .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
1906 .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
1907 .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
1908 .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
1909 .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
1910 .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
1911 .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
1912 .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
1913 .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
1914 .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
1915 .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
1916 .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
1917 .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
1918 .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
1919 .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
1920 .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
1921 .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
1922 .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
1923 .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
1924 .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
1925 .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
1926 .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
1927 .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
1928 .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
1929 .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
1930 .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
1931 .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
1932 .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
1933 .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
1934 .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
1935 .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
1936 .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
1937 .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
1938 .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
1939 .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
1940 .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
1941 .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
1942 .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
1943 .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
1944 .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
1945 .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
1946 .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
1947 .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
1948 .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
1949 .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
1950 .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
1951 .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
1952 .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
1953 .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
1954
1955 AES_Td:
1956 .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
1957 .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
1958 .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
1959 .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
1960 .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
1961 .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
1962 .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
1963 .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
1964 .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
1965 .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
1966 .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
1967 .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
1968 .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
1969 .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
1970 .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
1971 .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
1972 .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
1973 .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
1974 .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
1975 .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
1976 .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
1977 .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
1978 .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
1979 .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
1980 .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
1981 .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
1982 .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
1983 .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
1984 .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
1985 .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
1986 .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
1987 .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
1988 .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
1989 .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
1990 .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
1991 .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
1992 .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
1993 .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
1994 .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
1995 .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
1996 .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
1997 .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
1998 .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
1999 .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
2000 .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
2001 .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
2002 .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
2003 .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
2004 .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
2005 .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
2006 .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
2007 .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
2008 .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
2009 .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
2010 .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
2011 .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
2012 .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
2013 .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
2014 .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
2015 .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
2016 .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
2017 .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
2018 .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
2019 .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
2020 .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
2021 .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
2022 .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
2023 .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
2024 .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
2025 .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
2026 .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
2027 .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
2028 .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
2029 .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
2030 .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
2031 .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
2032 .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
2033 .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
2034 .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
2035 .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
2036 .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
2037 .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
2038 .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
2039 .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
2040 .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
2041 .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
2042 .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
2043 .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
2044 .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
2045 .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
2046 .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
2047 .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
2048 .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
2049 .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
2050 .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
2051 .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
2052 .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
2053 .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
2054 .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
2055 .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
2056 .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
2057 .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
2058 .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
2059 .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
2060 .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
2061 .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
2062 .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
2063 .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
2064 .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
2065 .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
2066 .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
2067 .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
2068 .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
2069 .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
2070 .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
2071 .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
2072 .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
2073 .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
2074 .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
2075 .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
2076 .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
2077 .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
2078 .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
2079 .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
2080 .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
2081 .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
2082 .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
2083 .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
2084
2085 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
2086 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2087 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2088 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2089 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2090 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2091 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2092 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2093 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2094 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2095 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2096 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2097 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2098 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2099 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2100 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2101 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2102 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2103 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2104 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2105 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2106 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2107 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2108 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2109 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2110 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2111 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2112 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2113 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2114 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2115 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2116 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2117
2118 AES_Te4:
2119 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
2120 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2121 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2122 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2123 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2124 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2125 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2126 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2127 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2128 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2129 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2130 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2131 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2132 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2133 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2134 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2135 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2136 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2137 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2138 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2139 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2140 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2141 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2142 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2143 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2144 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2145 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2146 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2147 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2148 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2149 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2150 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2151
2152 .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
2153 .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
2154 .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
2155 .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
2156 .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
2157 ___
2158 \f
2159 foreach (split("\n",$code)) {
2160 s/\`([^\`]*)\`/eval $1/ge;
2161
2162 # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2163 # with byte order dependencies...
2164 if (/^\s+_/) {
2165 s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2166
2167 s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2168 sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
2169 : eval("24-$3"))/e or
2170 s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2171 sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
2172 : eval("24-$3"))/e or
2173 s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2174 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2175 : eval("24-$3"))/e or
2176 s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2177 sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
2178 : eval("$3*-1"))/e or
2179 s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2180 sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
2181 : eval("($3-16)&31"))/e;
2182
2183 s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2184 sprintf("sll\t$1,$2,$3")/e or
2185 s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2186 sprintf("and\t$1,$2,0xff")/e or
2187 s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2188 }
2189
2190 # convert lwl/lwr and swr/swl to little-endian order
2191 if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2192 s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2193 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
2194 s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2195 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2196 }
2197
2198 if (!$big_endian) {
2199 s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2200 s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2201 }
2202
2203 print $_,"\n";
2204 }
2205
2206 close STDOUT or die "error closing STDOUT: $!";