]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aesp8-ppc.pl
31a260fc3ccb62ce0a6b34227962fe09f62058a9
[thirdparty/openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
23
24 $flavour = shift;
25
26 if ($flavour =~ /64/) {
27 $SIZE_T =8;
28 $LRSAVE =2*$SIZE_T;
29 $STU ="stdu";
30 $POP ="ld";
31 $PUSH ="std";
32 $UCMP ="cmpld";
33 } elsif ($flavour =~ /32/) {
34 $SIZE_T =4;
35 $LRSAVE =$SIZE_T;
36 $STU ="stwu";
37 $POP ="lwz";
38 $PUSH ="stw";
39 $UCMP ="cmplw";
40 } else { die "nonsense $flavour"; }
41
42 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
43
44 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
45 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
46 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
47 die "can't locate ppc-xlate.pl";
48
49 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
50
51 $FRAME=8*$SIZE_T;
52 $prefix="aes_p8";
53
54 $sp="r1";
55 $vrsave="r12";
56
57 #########################################################################
58 {{{ Key setup procedures #
59 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
60 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
61 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
62
63 $code.=<<___;
64 .machine "any"
65
66 .text
67
68 .align 7
69 rcon:
70 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
71 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
72 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
73 .long 0,0,0,0 ?asis
74 Lconsts:
75 mflr r0
76 bcl 20,31,\$+4
77 mflr $ptr #vvvvv "distance between . and rcon
78 addi $ptr,$ptr,-0x48
79 mtlr r0
80 blr
81 .long 0
82 .byte 0,12,0x14,0,0,0,0,0
83 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
84
85 .globl .${prefix}_set_encrypt_key
86 .align 5
87 .${prefix}_set_encrypt_key:
88 Lset_encrypt_key:
89 mflr r11
90 lis r0,0xfff0
91 $PUSH r11,$LRSAVE($sp)
92 mfspr $vrsave,256
93 mtspr 256,r0
94
95 bl Lconsts
96 mtlr r11
97
98 neg r9,$inp
99 lvx $in0,0,$inp
100 addi $inp,$inp,15 # 15 is not typo
101 lvsr $key,0,r9 # borrow $key
102 li r8,0x20
103 cmpwi $bits,192
104 lvx $in1,0,$inp
105 le?vspltisb $mask,0x0f # borrow $mask
106 lvx $rcon,0,$ptr
107 le?vxor $key,$key,$mask # adjust for byte swap
108 lvx $mask,r8,$ptr
109 addi $ptr,$ptr,0x10
110 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
111 li $cnt,8
112 vxor $zero,$zero,$zero
113 mtctr $cnt
114
115 ?lvsr $outperm,0,$out
116 vspltisb $outmask,-1
117 lvx $outhead,0,$out
118 ?vperm $outmask,$zero,$outmask,$outperm
119
120 blt Loop128
121 addi $inp,$inp,8
122 beq L192
123 addi $inp,$inp,8
124 b L256
125
126 .align 4
127 Loop128:
128 vperm $key,$in0,$in0,$mask # rotate-n-splat
129 vsldoi $tmp,$zero,$in0,12 # >>32
130 vperm $outtail,$in0,$in0,$outperm # rotate
131 vsel $stage,$outhead,$outtail,$outmask
132 vmr $outhead,$outtail
133 vcipherlast $key,$key,$rcon
134 stvx $stage,0,$out
135 addi $out,$out,16
136
137 vxor $in0,$in0,$tmp
138 vsldoi $tmp,$zero,$tmp,12 # >>32
139 vxor $in0,$in0,$tmp
140 vsldoi $tmp,$zero,$tmp,12 # >>32
141 vxor $in0,$in0,$tmp
142 vadduwm $rcon,$rcon,$rcon
143 vxor $in0,$in0,$key
144 bdnz Loop128
145
146 lvx $rcon,0,$ptr # last two round keys
147
148 vperm $key,$in0,$in0,$mask # rotate-n-splat
149 vsldoi $tmp,$zero,$in0,12 # >>32
150 vperm $outtail,$in0,$in0,$outperm # rotate
151 vsel $stage,$outhead,$outtail,$outmask
152 vmr $outhead,$outtail
153 vcipherlast $key,$key,$rcon
154 stvx $stage,0,$out
155 addi $out,$out,16
156
157 vxor $in0,$in0,$tmp
158 vsldoi $tmp,$zero,$tmp,12 # >>32
159 vxor $in0,$in0,$tmp
160 vsldoi $tmp,$zero,$tmp,12 # >>32
161 vxor $in0,$in0,$tmp
162 vadduwm $rcon,$rcon,$rcon
163 vxor $in0,$in0,$key
164
165 vperm $key,$in0,$in0,$mask # rotate-n-splat
166 vsldoi $tmp,$zero,$in0,12 # >>32
167 vperm $outtail,$in0,$in0,$outperm # rotate
168 vsel $stage,$outhead,$outtail,$outmask
169 vmr $outhead,$outtail
170 vcipherlast $key,$key,$rcon
171 stvx $stage,0,$out
172 addi $out,$out,16
173
174 vxor $in0,$in0,$tmp
175 vsldoi $tmp,$zero,$tmp,12 # >>32
176 vxor $in0,$in0,$tmp
177 vsldoi $tmp,$zero,$tmp,12 # >>32
178 vxor $in0,$in0,$tmp
179 vxor $in0,$in0,$key
180 vperm $outtail,$in0,$in0,$outperm # rotate
181 vsel $stage,$outhead,$outtail,$outmask
182 vmr $outhead,$outtail
183 stvx $stage,0,$out
184
185 addi $inp,$out,15 # 15 is not typo
186 addi $out,$out,0x50
187
188 li $rounds,10
189 b Ldone
190
191 .align 4
192 L192:
193 lvx $tmp,0,$inp
194 li $cnt,4
195 vperm $outtail,$in0,$in0,$outperm # rotate
196 vsel $stage,$outhead,$outtail,$outmask
197 vmr $outhead,$outtail
198 stvx $stage,0,$out
199 addi $out,$out,16
200 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
201 vspltisb $key,8 # borrow $key
202 mtctr $cnt
203 vsububm $mask,$mask,$key # adjust the mask
204
205 Loop192:
206 vperm $key,$in1,$in1,$mask # roate-n-splat
207 vsldoi $tmp,$zero,$in0,12 # >>32
208 vcipherlast $key,$key,$rcon
209
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215
216 vsldoi $stage,$zero,$in1,8
217 vspltw $tmp,$in0,3
218 vxor $tmp,$tmp,$in1
219 vsldoi $in1,$zero,$in1,12 # >>32
220 vadduwm $rcon,$rcon,$rcon
221 vxor $in1,$in1,$tmp
222 vxor $in0,$in0,$key
223 vxor $in1,$in1,$key
224 vsldoi $stage,$stage,$in0,8
225
226 vperm $key,$in1,$in1,$mask # rotate-n-splat
227 vsldoi $tmp,$zero,$in0,12 # >>32
228 vperm $outtail,$stage,$stage,$outperm # rotate
229 vsel $stage,$outhead,$outtail,$outmask
230 vmr $outhead,$outtail
231 vcipherlast $key,$key,$rcon
232 stvx $stage,0,$out
233 addi $out,$out,16
234
235 vsldoi $stage,$in0,$in1,8
236 vxor $in0,$in0,$tmp
237 vsldoi $tmp,$zero,$tmp,12 # >>32
238 vperm $outtail,$stage,$stage,$outperm # rotate
239 vsel $stage,$outhead,$outtail,$outmask
240 vmr $outhead,$outtail
241 vxor $in0,$in0,$tmp
242 vsldoi $tmp,$zero,$tmp,12 # >>32
243 vxor $in0,$in0,$tmp
244 stvx $stage,0,$out
245 addi $out,$out,16
246
247 vspltw $tmp,$in0,3
248 vxor $tmp,$tmp,$in1
249 vsldoi $in1,$zero,$in1,12 # >>32
250 vadduwm $rcon,$rcon,$rcon
251 vxor $in1,$in1,$tmp
252 vxor $in0,$in0,$key
253 vxor $in1,$in1,$key
254 vperm $outtail,$in0,$in0,$outperm # rotate
255 vsel $stage,$outhead,$outtail,$outmask
256 vmr $outhead,$outtail
257 stvx $stage,0,$out
258 addi $inp,$out,15 # 15 is not typo
259 addi $out,$out,16
260 bdnz Loop192
261
262 li $rounds,12
263 addi $out,$out,0x20
264 b Ldone
265
266 .align 4
267 L256:
268 lvx $tmp,0,$inp
269 li $cnt,7
270 li $rounds,14
271 vperm $outtail,$in0,$in0,$outperm # rotate
272 vsel $stage,$outhead,$outtail,$outmask
273 vmr $outhead,$outtail
274 stvx $stage,0,$out
275 addi $out,$out,16
276 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
277 mtctr $cnt
278
279 Loop256:
280 vperm $key,$in1,$in1,$mask # rotate-n-splat
281 vsldoi $tmp,$zero,$in0,12 # >>32
282 vperm $outtail,$in1,$in1,$outperm # rotate
283 vsel $stage,$outhead,$outtail,$outmask
284 vmr $outhead,$outtail
285 vcipherlast $key,$key,$rcon
286 stvx $stage,0,$out
287 addi $out,$out,16
288
289 vxor $in0,$in0,$tmp
290 vsldoi $tmp,$zero,$tmp,12 # >>32
291 vxor $in0,$in0,$tmp
292 vsldoi $tmp,$zero,$tmp,12 # >>32
293 vxor $in0,$in0,$tmp
294 vadduwm $rcon,$rcon,$rcon
295 vxor $in0,$in0,$key
296 vperm $outtail,$in0,$in0,$outperm # rotate
297 vsel $stage,$outhead,$outtail,$outmask
298 vmr $outhead,$outtail
299 stvx $stage,0,$out
300 addi $inp,$out,15 # 15 is not typo
301 addi $out,$out,16
302 bdz Ldone
303
304 vspltw $key,$in0,3 # just splat
305 vsldoi $tmp,$zero,$in1,12 # >>32
306 vsbox $key,$key
307
308 vxor $in1,$in1,$tmp
309 vsldoi $tmp,$zero,$tmp,12 # >>32
310 vxor $in1,$in1,$tmp
311 vsldoi $tmp,$zero,$tmp,12 # >>32
312 vxor $in1,$in1,$tmp
313
314 vxor $in1,$in1,$key
315 b Loop256
316
317 .align 4
318 Ldone:
319 lvx $in1,0,$inp # redundant in aligned case
320 vsel $in1,$outhead,$in1,$outmask
321 stvx $in1,0,$inp
322 xor r3,r3,r3 # return value
323 mtspr 256,$vrsave
324 stw $rounds,0($out)
325
326 blr
327 .long 0
328 .byte 0,12,0x14,1,0,0,3,0
329 .long 0
330 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
331
332 .globl .${prefix}_set_decrypt_key
333 .align 5
334 .${prefix}_set_decrypt_key:
335 $STU $sp,-$FRAME($sp)
336 mflr r10
337 $PUSH r10,$FRAME+$LRSAVE($sp)
338 bl Lset_encrypt_key
339 mtlr r10
340
341 slwi $cnt,$rounds,4
342 subi $inp,$out,240 # first round key
343 srwi $rounds,$rounds,1
344 add $out,$inp,$cnt # last round key
345 mtctr $rounds
346
347 Ldeckey:
348 lwz r0, 0($inp)
349 lwz r6, 4($inp)
350 lwz r7, 8($inp)
351 lwz r8, 12($inp)
352 addi $inp,$inp,16
353 lwz r9, 0($out)
354 lwz r10,4($out)
355 lwz r11,8($out)
356 lwz r12,12($out)
357 stw r0, 0($out)
358 stw r6, 4($out)
359 stw r7, 8($out)
360 stw r8, 12($out)
361 subi $out,$out,16
362 stw r9, -16($inp)
363 stw r10,-12($inp)
364 stw r11,-8($inp)
365 stw r12,-4($inp)
366 bdnz Ldeckey
367
368 xor r3,r3,r3 # return value
369 addi $sp,$sp,$FRAME
370 blr
371 .long 0
372 .byte 0,12,4,1,0x80,0,3,0
373 .long 0
374 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
375 ___
376 }}}
377 #########################################################################
378 {{{ Single block en- and decrypt procedures #
379 sub gen_block () {
380 my $dir = shift;
381 my $n = $dir eq "de" ? "n" : "";
382 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
383
384 $code.=<<___;
385 .globl .${prefix}_${dir}crypt
386 .align 5
387 .${prefix}_${dir}crypt:
388 lwz $rounds,240($key)
389 lis r0,0xfc00
390 mfspr $vrsave,256
391 li $idx,15 # 15 is not typo
392 mtspr 256,r0
393
394 lvx v0,0,$inp
395 neg r11,$out
396 lvx v1,$idx,$inp
397 lvsl v2,0,$inp # inpperm
398 le?vspltisb v4,0x0f
399 ?lvsl v3,0,r11 # outperm
400 le?vxor v2,v2,v4
401 li $idx,16
402 vperm v0,v0,v1,v2 # align [and byte swap in LE]
403 lvx v1,0,$key
404 ?lvsl v5,0,$key # keyperm
405 srwi $rounds,$rounds,1
406 lvx v2,$idx,$key
407 addi $idx,$idx,16
408 subi $rounds,$rounds,1
409 ?vperm v1,v1,v2,v5 # align round key
410
411 vxor v0,v0,v1
412 lvx v1,$idx,$key
413 addi $idx,$idx,16
414 mtctr $rounds
415
416 Loop_${dir}c:
417 ?vperm v2,v2,v1,v5
418 v${n}cipher v0,v0,v2
419 lvx v2,$idx,$key
420 addi $idx,$idx,16
421 ?vperm v1,v1,v2,v5
422 v${n}cipher v0,v0,v1
423 lvx v1,$idx,$key
424 addi $idx,$idx,16
425 bdnz Loop_${dir}c
426
427 ?vperm v2,v2,v1,v5
428 v${n}cipher v0,v0,v2
429 lvx v2,$idx,$key
430 ?vperm v1,v1,v2,v5
431 v${n}cipherlast v0,v0,v1
432
433 vspltisb v2,-1
434 vxor v1,v1,v1
435 li $idx,15 # 15 is not typo
436 ?vperm v2,v1,v2,v3 # outmask
437 le?vxor v3,v3,v4
438 lvx v1,0,$out # outhead
439 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
440 vsel v1,v1,v0,v2
441 lvx v4,$idx,$out
442 stvx v1,0,$out
443 vsel v0,v0,v4,v2
444 stvx v0,$idx,$out
445
446 mtspr 256,$vrsave
447 blr
448 .long 0
449 .byte 0,12,0x14,0,0,0,3,0
450 .long 0
451 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
452 ___
453 }
454 &gen_block("en");
455 &gen_block("de");
456 }}}
457 #########################################################################
458 {{{ CBC en- and decrypt procedures #
459 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
460 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
461 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
462 map("v$_",(4..10));
463 $code.=<<___;
464 .globl .${prefix}_cbc_encrypt
465 .align 5
466 .${prefix}_cbc_encrypt:
467 ${UCMP}i $len,16
468 bltlr-
469
470 cmpwi $enc,0 # test direction
471 lis r0,0xffe0
472 mfspr $vrsave,256
473 mtspr 256,r0
474
475 li $idx,15
476 vxor $rndkey0,$rndkey0,$rndkey0
477 le?vspltisb $tmp,0x0f
478
479 lvx $ivec,0,$ivp # load [unaligned] iv
480 lvsl $inpperm,0,$ivp
481 lvx $inptail,$idx,$ivp
482 le?vxor $inpperm,$inpperm,$tmp
483 vperm $ivec,$ivec,$inptail,$inpperm
484
485 neg r11,$inp
486 ?lvsl $keyperm,0,$key # prepare for unaligned key
487 lwz $rounds,240($key)
488
489 lvsr $inpperm,0,r11 # prepare for unaligned load
490 lvx $inptail,0,$inp
491 addi $inp,$inp,15 # 15 is not typo
492 le?vxor $inpperm,$inpperm,$tmp
493
494 ?lvsr $outperm,0,$out # prepare for unaligned store
495 vspltisb $outmask,-1
496 lvx $outhead,0,$out
497 ?vperm $outmask,$rndkey0,$outmask,$outperm
498 le?vxor $outperm,$outperm,$tmp
499
500 srwi $rounds,$rounds,1
501 li $idx,16
502 subi $rounds,$rounds,1
503 beq Lcbc_dec
504
505 Lcbc_enc:
506 vmr $inout,$inptail
507 lvx $inptail,0,$inp
508 addi $inp,$inp,16
509 mtctr $rounds
510 subi $len,$len,16 # len-=16
511
512 lvx $rndkey0,0,$key
513 vperm $inout,$inout,$inptail,$inpperm
514 lvx $rndkey1,$idx,$key
515 addi $idx,$idx,16
516 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
517 vxor $inout,$inout,$rndkey0
518 lvx $rndkey0,$idx,$key
519 addi $idx,$idx,16
520 vxor $inout,$inout,$ivec
521
522 Loop_cbc_enc:
523 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
524 vcipher $inout,$inout,$rndkey1
525 lvx $rndkey1,$idx,$key
526 addi $idx,$idx,16
527 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
528 vcipher $inout,$inout,$rndkey0
529 lvx $rndkey0,$idx,$key
530 addi $idx,$idx,16
531 bdnz Loop_cbc_enc
532
533 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
534 vcipher $inout,$inout,$rndkey1
535 lvx $rndkey1,$idx,$key
536 li $idx,16
537 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
538 vcipherlast $ivec,$inout,$rndkey0
539 ${UCMP}i $len,16
540
541 vperm $tmp,$ivec,$ivec,$outperm
542 vsel $inout,$outhead,$tmp,$outmask
543 vmr $outhead,$tmp
544 stvx $inout,0,$out
545 addi $out,$out,16
546 bge Lcbc_enc
547
548 b Lcbc_done
549
550 .align 4
551 Lcbc_dec:
552 ${UCMP}i $len,128
553 bge _aesp8_cbc_decrypt8x
554 vmr $tmp,$inptail
555 lvx $inptail,0,$inp
556 addi $inp,$inp,16
557 mtctr $rounds
558 subi $len,$len,16 # len-=16
559
560 lvx $rndkey0,0,$key
561 vperm $tmp,$tmp,$inptail,$inpperm
562 lvx $rndkey1,$idx,$key
563 addi $idx,$idx,16
564 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
565 vxor $inout,$tmp,$rndkey0
566 lvx $rndkey0,$idx,$key
567 addi $idx,$idx,16
568
569 Loop_cbc_dec:
570 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
571 vncipher $inout,$inout,$rndkey1
572 lvx $rndkey1,$idx,$key
573 addi $idx,$idx,16
574 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
575 vncipher $inout,$inout,$rndkey0
576 lvx $rndkey0,$idx,$key
577 addi $idx,$idx,16
578 bdnz Loop_cbc_dec
579
580 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
581 vncipher $inout,$inout,$rndkey1
582 lvx $rndkey1,$idx,$key
583 li $idx,16
584 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
585 vncipherlast $inout,$inout,$rndkey0
586 ${UCMP}i $len,16
587
588 vxor $inout,$inout,$ivec
589 vmr $ivec,$tmp
590 vperm $tmp,$inout,$inout,$outperm
591 vsel $inout,$outhead,$tmp,$outmask
592 vmr $outhead,$tmp
593 stvx $inout,0,$out
594 addi $out,$out,16
595 bge Lcbc_dec
596
597 Lcbc_done:
598 addi $out,$out,-1
599 lvx $inout,0,$out # redundant in aligned case
600 vsel $inout,$outhead,$inout,$outmask
601 stvx $inout,0,$out
602
603 neg $enc,$ivp # write [unaligned] iv
604 li $idx,15 # 15 is not typo
605 vxor $rndkey0,$rndkey0,$rndkey0
606 vspltisb $outmask,-1
607 le?vspltisb $tmp,0x0f
608 ?lvsl $outperm,0,$enc
609 ?vperm $outmask,$rndkey0,$outmask,$outperm
610 le?vxor $outperm,$outperm,$tmp
611 lvx $outhead,0,$ivp
612 vperm $ivec,$ivec,$ivec,$outperm
613 vsel $inout,$outhead,$ivec,$outmask
614 lvx $inptail,$idx,$ivp
615 stvx $inout,0,$ivp
616 vsel $inout,$ivec,$inptail,$outmask
617 stvx $inout,$idx,$ivp
618
619 mtspr 256,$vrsave
620 blr
621 .long 0
622 .byte 0,12,0x14,0,0,0,6,0
623 .long 0
624 ___
625 #########################################################################
626 {{ Optimized CBC decrypt procedure #
627 my $key_="r11";
628 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
629 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
630 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
631 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
632 # v26-v31 last 6 round keys
633 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
634
635 $code.=<<___;
636 .align 5
637 _aesp8_cbc_decrypt8x:
638 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
639 li r10,`$FRAME+8*16+15`
640 li r11,`$FRAME+8*16+31`
641 stvx v20,r10,$sp # ABI says so
642 addi r10,r10,32
643 stvx v21,r11,$sp
644 addi r11,r11,32
645 stvx v22,r10,$sp
646 addi r10,r10,32
647 stvx v23,r11,$sp
648 addi r11,r11,32
649 stvx v24,r10,$sp
650 addi r10,r10,32
651 stvx v25,r11,$sp
652 addi r11,r11,32
653 stvx v26,r10,$sp
654 addi r10,r10,32
655 stvx v27,r11,$sp
656 addi r11,r11,32
657 stvx v28,r10,$sp
658 addi r10,r10,32
659 stvx v29,r11,$sp
660 addi r11,r11,32
661 stvx v30,r10,$sp
662 stvx v31,r11,$sp
663 li r0,-1
664 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
665 li $x10,0x10
666 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
667 li $x20,0x20
668 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
669 li $x30,0x30
670 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
671 li $x40,0x40
672 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
673 li $x50,0x50
674 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
675 li $x60,0x60
676 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
677 li $x70,0x70
678 mtspr 256,r0
679
680 subi $rounds,$rounds,3 # -4 in total
681 subi $len,$len,128 # bias
682
683 lvx $rndkey0,$x00,$key # load key schedule
684 lvx v30,$x10,$key
685 addi $key,$key,0x20
686 lvx v31,$x00,$key
687 ?vperm $rndkey0,$rndkey0,v30,$keyperm
688 addi $key_,$sp,$FRAME+15
689 mtctr $rounds
690
691 Load_cbc_dec_key:
692 ?vperm v24,v30,v31,$keyperm
693 lvx v30,$x10,$key
694 addi $key,$key,0x20
695 stvx v24,$x00,$key_ # off-load round[1]
696 ?vperm v25,v31,v30,$keyperm
697 lvx v31,$x00,$key
698 stvx v25,$x10,$key_ # off-load round[2]
699 addi $key_,$key_,0x20
700 bdnz Load_cbc_dec_key
701
702 lvx v26,$x10,$key
703 ?vperm v24,v30,v31,$keyperm
704 lvx v27,$x20,$key
705 stvx v24,$x00,$key_ # off-load round[3]
706 ?vperm v25,v31,v26,$keyperm
707 lvx v28,$x30,$key
708 stvx v25,$x10,$key_ # off-load round[4]
709 addi $key_,$sp,$FRAME+15 # rewind $key_
710 ?vperm v26,v26,v27,$keyperm
711 lvx v29,$x40,$key
712 ?vperm v27,v27,v28,$keyperm
713 lvx v30,$x50,$key
714 ?vperm v28,v28,v29,$keyperm
715 lvx v31,$x60,$key
716 ?vperm v29,v29,v30,$keyperm
717 lvx $out0,$x70,$key # borrow $out0
718 ?vperm v30,v30,v31,$keyperm
719 lvx v24,$x00,$key_ # pre-load round[1]
720 ?vperm v31,v31,$out0,$keyperm
721 lvx v25,$x10,$key_ # pre-load round[2]
722
723 #lvx $inptail,0,$inp # "caller" already did this
724 #addi $inp,$inp,15 # 15 is not typo
725 subi $inp,$inp,15 # undo "caller"
726
727 le?li $idx,8
728 lvx_u $in0,$x00,$inp # load first 8 "words"
729 le?lvsl $inpperm,0,$idx
730 le?vspltisb $tmp,0x0f
731 lvx_u $in1,$x10,$inp
732 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
733 lvx_u $in2,$x20,$inp
734 le?vperm $in0,$in0,$in0,$inpperm
735 lvx_u $in3,$x30,$inp
736 le?vperm $in1,$in1,$in1,$inpperm
737 lvx_u $in4,$x40,$inp
738 le?vperm $in2,$in2,$in2,$inpperm
739 vxor $out0,$in0,$rndkey0
740 lvx_u $in5,$x50,$inp
741 le?vperm $in3,$in3,$in3,$inpperm
742 vxor $out1,$in1,$rndkey0
743 lvx_u $in6,$x60,$inp
744 le?vperm $in4,$in4,$in4,$inpperm
745 vxor $out2,$in2,$rndkey0
746 lvx_u $in7,$x70,$inp
747 addi $inp,$inp,0x80
748 le?vperm $in5,$in5,$in5,$inpperm
749 vxor $out3,$in3,$rndkey0
750 le?vperm $in6,$in6,$in6,$inpperm
751 vxor $out4,$in4,$rndkey0
752 le?vperm $in7,$in7,$in7,$inpperm
753 vxor $out5,$in5,$rndkey0
754 vxor $out6,$in6,$rndkey0
755 vxor $out7,$in7,$rndkey0
756
757 mtctr $rounds
758 b Loop_cbc_dec8x
759 .align 5
760 Loop_cbc_dec8x:
761 vncipher $out0,$out0,v24
762 vncipher $out1,$out1,v24
763 vncipher $out2,$out2,v24
764 vncipher $out3,$out3,v24
765 vncipher $out4,$out4,v24
766 vncipher $out5,$out5,v24
767 vncipher $out6,$out6,v24
768 vncipher $out7,$out7,v24
769 lvx v24,$x20,$key_ # round[3]
770 addi $key_,$key_,0x20
771
772 vncipher $out0,$out0,v25
773 vncipher $out1,$out1,v25
774 vncipher $out2,$out2,v25
775 vncipher $out3,$out3,v25
776 vncipher $out4,$out4,v25
777 vncipher $out5,$out5,v25
778 vncipher $out6,$out6,v25
779 vncipher $out7,$out7,v25
780 lvx v25,$x10,$key_ # round[4]
781 bdnz Loop_cbc_dec8x
782
783 subic $len,$len,128 # $len-=128
784 vncipher $out0,$out0,v24
785 vncipher $out1,$out1,v24
786 vncipher $out2,$out2,v24
787 vncipher $out3,$out3,v24
788 vncipher $out4,$out4,v24
789 vncipher $out5,$out5,v24
790 vncipher $out6,$out6,v24
791 vncipher $out7,$out7,v24
792
793 subfe. r0,r0,r0 # borrow?-1:0
794 vncipher $out0,$out0,v25
795 vncipher $out1,$out1,v25
796 vncipher $out2,$out2,v25
797 vncipher $out3,$out3,v25
798 vncipher $out4,$out4,v25
799 vncipher $out5,$out5,v25
800 vncipher $out6,$out6,v25
801 vncipher $out7,$out7,v25
802
803 and r0,r0,$len
804 vncipher $out0,$out0,v26
805 vncipher $out1,$out1,v26
806 vncipher $out2,$out2,v26
807 vncipher $out3,$out3,v26
808 vncipher $out4,$out4,v26
809 vncipher $out5,$out5,v26
810 vncipher $out6,$out6,v26
811 vncipher $out7,$out7,v26
812
813 add $inp,$inp,r0 # $inp is adjusted in such
814 # way that at exit from the
815 # loop inX-in7 are loaded
816 # with last "words"
817 vncipher $out0,$out0,v27
818 vncipher $out1,$out1,v27
819 vncipher $out2,$out2,v27
820 vncipher $out3,$out3,v27
821 vncipher $out4,$out4,v27
822 vncipher $out5,$out5,v27
823 vncipher $out6,$out6,v27
824 vncipher $out7,$out7,v27
825
826 addi $key_,$sp,$FRAME+15 # rewind $key_
827 vncipher $out0,$out0,v28
828 vncipher $out1,$out1,v28
829 vncipher $out2,$out2,v28
830 vncipher $out3,$out3,v28
831 vncipher $out4,$out4,v28
832 vncipher $out5,$out5,v28
833 vncipher $out6,$out6,v28
834 vncipher $out7,$out7,v28
835 lvx v24,$x00,$key_ # re-pre-load round[1]
836
837 vncipher $out0,$out0,v29
838 vncipher $out1,$out1,v29
839 vncipher $out2,$out2,v29
840 vncipher $out3,$out3,v29
841 vncipher $out4,$out4,v29
842 vncipher $out5,$out5,v29
843 vncipher $out6,$out6,v29
844 vncipher $out7,$out7,v29
845 lvx v25,$x10,$key_ # re-pre-load round[2]
846
847 vncipher $out0,$out0,v30
848 vxor $ivec,$ivec,v31 # xor with last round key
849 vncipher $out1,$out1,v30
850 vxor $in0,$in0,v31
851 vncipher $out2,$out2,v30
852 vxor $in1,$in1,v31
853 vncipher $out3,$out3,v30
854 vxor $in2,$in2,v31
855 vncipher $out4,$out4,v30
856 vxor $in3,$in3,v31
857 vncipher $out5,$out5,v30
858 vxor $in4,$in4,v31
859 vncipher $out6,$out6,v30
860 vxor $in5,$in5,v31
861 vncipher $out7,$out7,v30
862 vxor $in6,$in6,v31
863
864 vncipherlast $out0,$out0,$ivec
865 vncipherlast $out1,$out1,$in0
866 lvx_u $in0,$x00,$inp # load next input block
867 vncipherlast $out2,$out2,$in1
868 lvx_u $in1,$x10,$inp
869 vncipherlast $out3,$out3,$in2
870 le?vperm $in0,$in0,$in0,$inpperm
871 lvx_u $in2,$x20,$inp
872 vncipherlast $out4,$out4,$in3
873 le?vperm $in1,$in1,$in1,$inpperm
874 lvx_u $in3,$x30,$inp
875 vncipherlast $out5,$out5,$in4
876 le?vperm $in2,$in2,$in2,$inpperm
877 lvx_u $in4,$x40,$inp
878 vncipherlast $out6,$out6,$in5
879 le?vperm $in3,$in3,$in3,$inpperm
880 lvx_u $in5,$x50,$inp
881 vncipherlast $out7,$out7,$in6
882 le?vperm $in4,$in4,$in4,$inpperm
883 lvx_u $in6,$x60,$inp
884 vmr $ivec,$in7
885 le?vperm $in5,$in5,$in5,$inpperm
886 lvx_u $in7,$x70,$inp
887 addi $inp,$inp,0x80
888
889 le?vperm $out0,$out0,$out0,$inpperm
890 le?vperm $out1,$out1,$out1,$inpperm
891 stvx_u $out0,$x00,$out
892 le?vperm $in6,$in6,$in6,$inpperm
893 vxor $out0,$in0,$rndkey0
894 le?vperm $out2,$out2,$out2,$inpperm
895 stvx_u $out1,$x10,$out
896 le?vperm $in7,$in7,$in7,$inpperm
897 vxor $out1,$in1,$rndkey0
898 le?vperm $out3,$out3,$out3,$inpperm
899 stvx_u $out2,$x20,$out
900 vxor $out2,$in2,$rndkey0
901 le?vperm $out4,$out4,$out4,$inpperm
902 stvx_u $out3,$x30,$out
903 vxor $out3,$in3,$rndkey0
904 le?vperm $out5,$out5,$out5,$inpperm
905 stvx_u $out4,$x40,$out
906 vxor $out4,$in4,$rndkey0
907 le?vperm $out6,$out6,$out6,$inpperm
908 stvx_u $out5,$x50,$out
909 vxor $out5,$in5,$rndkey0
910 le?vperm $out7,$out7,$out7,$inpperm
911 stvx_u $out6,$x60,$out
912 vxor $out6,$in6,$rndkey0
913 stvx_u $out7,$x70,$out
914 addi $out,$out,0x80
915 vxor $out7,$in7,$rndkey0
916
917 mtctr $rounds
918 beq Loop_cbc_dec8x # did $len-=128 borrow?
919
920 addic. $len,$len,128
921 beq Lcbc_dec8x_done
922 nop
923 nop
924
925 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
926 vncipher $out1,$out1,v24
927 vncipher $out2,$out2,v24
928 vncipher $out3,$out3,v24
929 vncipher $out4,$out4,v24
930 vncipher $out5,$out5,v24
931 vncipher $out6,$out6,v24
932 vncipher $out7,$out7,v24
933 lvx v24,$x20,$key_ # round[3]
934 addi $key_,$key_,0x20
935
936 vncipher $out1,$out1,v25
937 vncipher $out2,$out2,v25
938 vncipher $out3,$out3,v25
939 vncipher $out4,$out4,v25
940 vncipher $out5,$out5,v25
941 vncipher $out6,$out6,v25
942 vncipher $out7,$out7,v25
943 lvx v25,$x10,$key_ # round[4]
944 bdnz Loop_cbc_dec8x_tail
945
946 vncipher $out1,$out1,v24
947 vncipher $out2,$out2,v24
948 vncipher $out3,$out3,v24
949 vncipher $out4,$out4,v24
950 vncipher $out5,$out5,v24
951 vncipher $out6,$out6,v24
952 vncipher $out7,$out7,v24
953
954 vncipher $out1,$out1,v25
955 vncipher $out2,$out2,v25
956 vncipher $out3,$out3,v25
957 vncipher $out4,$out4,v25
958 vncipher $out5,$out5,v25
959 vncipher $out6,$out6,v25
960 vncipher $out7,$out7,v25
961
962 vncipher $out1,$out1,v26
963 vncipher $out2,$out2,v26
964 vncipher $out3,$out3,v26
965 vncipher $out4,$out4,v26
966 vncipher $out5,$out5,v26
967 vncipher $out6,$out6,v26
968 vncipher $out7,$out7,v26
969
970 vncipher $out1,$out1,v27
971 vncipher $out2,$out2,v27
972 vncipher $out3,$out3,v27
973 vncipher $out4,$out4,v27
974 vncipher $out5,$out5,v27
975 vncipher $out6,$out6,v27
976 vncipher $out7,$out7,v27
977
978 vncipher $out1,$out1,v28
979 vncipher $out2,$out2,v28
980 vncipher $out3,$out3,v28
981 vncipher $out4,$out4,v28
982 vncipher $out5,$out5,v28
983 vncipher $out6,$out6,v28
984 vncipher $out7,$out7,v28
985
986 vncipher $out1,$out1,v29
987 vncipher $out2,$out2,v29
988 vncipher $out3,$out3,v29
989 vncipher $out4,$out4,v29
990 vncipher $out5,$out5,v29
991 vncipher $out6,$out6,v29
992 vncipher $out7,$out7,v29
993
994 vncipher $out1,$out1,v30
995 vxor $ivec,$ivec,v31 # last round key
996 vncipher $out2,$out2,v30
997 vxor $in1,$in1,v31
998 vncipher $out3,$out3,v30
999 vxor $in2,$in2,v31
1000 vncipher $out4,$out4,v30
1001 vxor $in3,$in3,v31
1002 vncipher $out5,$out5,v30
1003 vxor $in4,$in4,v31
1004 vncipher $out6,$out6,v30
1005 vxor $in5,$in5,v31
1006 vncipher $out7,$out7,v30
1007 vxor $in6,$in6,v31
1008
1009 cmplwi $len,32 # switch($len)
1010 blt Lcbc_dec8x_one
1011 nop
1012 beq Lcbc_dec8x_two
1013 cmplwi $len,64
1014 blt Lcbc_dec8x_three
1015 nop
1016 beq Lcbc_dec8x_four
1017 cmplwi $len,96
1018 blt Lcbc_dec8x_five
1019 nop
1020 beq Lcbc_dec8x_six
1021
1022 Lcbc_dec8x_seven:
1023 vncipherlast $out1,$out1,$ivec
1024 vncipherlast $out2,$out2,$in1
1025 vncipherlast $out3,$out3,$in2
1026 vncipherlast $out4,$out4,$in3
1027 vncipherlast $out5,$out5,$in4
1028 vncipherlast $out6,$out6,$in5
1029 vncipherlast $out7,$out7,$in6
1030 vmr $ivec,$in7
1031
1032 le?vperm $out1,$out1,$out1,$inpperm
1033 le?vperm $out2,$out2,$out2,$inpperm
1034 stvx_u $out1,$x00,$out
1035 le?vperm $out3,$out3,$out3,$inpperm
1036 stvx_u $out2,$x10,$out
1037 le?vperm $out4,$out4,$out4,$inpperm
1038 stvx_u $out3,$x20,$out
1039 le?vperm $out5,$out5,$out5,$inpperm
1040 stvx_u $out4,$x30,$out
1041 le?vperm $out6,$out6,$out6,$inpperm
1042 stvx_u $out5,$x40,$out
1043 le?vperm $out7,$out7,$out7,$inpperm
1044 stvx_u $out6,$x50,$out
1045 stvx_u $out7,$x60,$out
1046 addi $out,$out,0x70
1047 b Lcbc_dec8x_done
1048
1049 .align 5
1050 Lcbc_dec8x_six:
1051 vncipherlast $out2,$out2,$ivec
1052 vncipherlast $out3,$out3,$in2
1053 vncipherlast $out4,$out4,$in3
1054 vncipherlast $out5,$out5,$in4
1055 vncipherlast $out6,$out6,$in5
1056 vncipherlast $out7,$out7,$in6
1057 vmr $ivec,$in7
1058
1059 le?vperm $out2,$out2,$out2,$inpperm
1060 le?vperm $out3,$out3,$out3,$inpperm
1061 stvx_u $out2,$x00,$out
1062 le?vperm $out4,$out4,$out4,$inpperm
1063 stvx_u $out3,$x10,$out
1064 le?vperm $out5,$out5,$out5,$inpperm
1065 stvx_u $out4,$x20,$out
1066 le?vperm $out6,$out6,$out6,$inpperm
1067 stvx_u $out5,$x30,$out
1068 le?vperm $out7,$out7,$out7,$inpperm
1069 stvx_u $out6,$x40,$out
1070 stvx_u $out7,$x50,$out
1071 addi $out,$out,0x60
1072 b Lcbc_dec8x_done
1073
1074 .align 5
1075 Lcbc_dec8x_five:
1076 vncipherlast $out3,$out3,$ivec
1077 vncipherlast $out4,$out4,$in3
1078 vncipherlast $out5,$out5,$in4
1079 vncipherlast $out6,$out6,$in5
1080 vncipherlast $out7,$out7,$in6
1081 vmr $ivec,$in7
1082
1083 le?vperm $out3,$out3,$out3,$inpperm
1084 le?vperm $out4,$out4,$out4,$inpperm
1085 stvx_u $out3,$x00,$out
1086 le?vperm $out5,$out5,$out5,$inpperm
1087 stvx_u $out4,$x10,$out
1088 le?vperm $out6,$out6,$out6,$inpperm
1089 stvx_u $out5,$x20,$out
1090 le?vperm $out7,$out7,$out7,$inpperm
1091 stvx_u $out6,$x30,$out
1092 stvx_u $out7,$x40,$out
1093 addi $out,$out,0x50
1094 b Lcbc_dec8x_done
1095
1096 .align 5
1097 Lcbc_dec8x_four:
1098 vncipherlast $out4,$out4,$ivec
1099 vncipherlast $out5,$out5,$in4
1100 vncipherlast $out6,$out6,$in5
1101 vncipherlast $out7,$out7,$in6
1102 vmr $ivec,$in7
1103
1104 le?vperm $out4,$out4,$out4,$inpperm
1105 le?vperm $out5,$out5,$out5,$inpperm
1106 stvx_u $out4,$x00,$out
1107 le?vperm $out6,$out6,$out6,$inpperm
1108 stvx_u $out5,$x10,$out
1109 le?vperm $out7,$out7,$out7,$inpperm
1110 stvx_u $out6,$x20,$out
1111 stvx_u $out7,$x30,$out
1112 addi $out,$out,0x40
1113 b Lcbc_dec8x_done
1114
1115 .align 5
1116 Lcbc_dec8x_three:
1117 vncipherlast $out5,$out5,$ivec
1118 vncipherlast $out6,$out6,$in5
1119 vncipherlast $out7,$out7,$in6
1120 vmr $ivec,$in7
1121
1122 le?vperm $out5,$out5,$out5,$inpperm
1123 le?vperm $out6,$out6,$out6,$inpperm
1124 stvx_u $out5,$x00,$out
1125 le?vperm $out7,$out7,$out7,$inpperm
1126 stvx_u $out6,$x10,$out
1127 stvx_u $out7,$x20,$out
1128 addi $out,$out,0x30
1129 b Lcbc_dec8x_done
1130
1131 .align 5
1132 Lcbc_dec8x_two:
1133 vncipherlast $out6,$out6,$ivec
1134 vncipherlast $out7,$out7,$in6
1135 vmr $ivec,$in7
1136
1137 le?vperm $out6,$out6,$out6,$inpperm
1138 le?vperm $out7,$out7,$out7,$inpperm
1139 stvx_u $out6,$x00,$out
1140 stvx_u $out7,$x10,$out
1141 addi $out,$out,0x20
1142 b Lcbc_dec8x_done
1143
1144 .align 5
1145 Lcbc_dec8x_one:
1146 vncipherlast $out7,$out7,$ivec
1147 vmr $ivec,$in7
1148
1149 le?vperm $out7,$out7,$out7,$inpperm
1150 stvx_u $out7,0,$out
1151 addi $out,$out,0x10
1152
1153 Lcbc_dec8x_done:
1154 le?vperm $ivec,$ivec,$ivec,$inpperm
1155 stvx_u $ivec,0,$ivp # write [unaligned] iv
1156
1157 li r10,`$FRAME+15`
1158 li r11,`$FRAME+31`
1159 stvx $inpperm,r10,$sp # wipe copies of round keys
1160 addi r10,r10,32
1161 stvx $inpperm,r11,$sp
1162 addi r11,r11,32
1163 stvx $inpperm,r10,$sp
1164 addi r10,r10,32
1165 stvx $inpperm,r11,$sp
1166 addi r11,r11,32
1167 stvx $inpperm,r10,$sp
1168 addi r10,r10,32
1169 stvx $inpperm,r11,$sp
1170 addi r11,r11,32
1171 stvx $inpperm,r10,$sp
1172 addi r10,r10,32
1173 stvx $inpperm,r11,$sp
1174 addi r11,r11,32
1175
1176 mtspr 256,$vrsave
1177 lvx v20,r10,$sp # ABI says so
1178 addi r10,r10,32
1179 lvx v21,r11,$sp
1180 addi r11,r11,32
1181 lvx v22,r10,$sp
1182 addi r10,r10,32
1183 lvx v23,r11,$sp
1184 addi r11,r11,32
1185 lvx v24,r10,$sp
1186 addi r10,r10,32
1187 lvx v25,r11,$sp
1188 addi r11,r11,32
1189 lvx v26,r10,$sp
1190 addi r10,r10,32
1191 lvx v27,r11,$sp
1192 addi r11,r11,32
1193 lvx v28,r10,$sp
1194 addi r10,r10,32
1195 lvx v29,r11,$sp
1196 addi r11,r11,32
1197 lvx v30,r10,$sp
1198 lvx v31,r11,$sp
1199 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1200 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1201 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1202 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1203 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1204 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1205 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1206 blr
1207 .long 0
1208 .byte 0,12,0x14,0,0x80,6,6,0
1209 .long 0
1210 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1211 ___
1212 }} }}}
1213
1214 my $consts=1;
1215 foreach(split("\n",$code)) {
1216 s/\`([^\`]*)\`/eval($1)/geo;
1217
1218 # constants table endian-specific conversion
1219 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1220 my $conv=$3;
1221 my @bytes=();
1222
1223 # convert to endian-agnostic format
1224 if ($1 eq "long") {
1225 foreach (split(/,\s*/,$2)) {
1226 my $l = /^0/?oct:int;
1227 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1228 }
1229 } else {
1230 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1231 }
1232
1233 # little-endian conversion
1234 if ($flavour =~ /le$/o) {
1235 SWITCH: for($conv) {
1236 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1237 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1238 }
1239 }
1240
1241 #emit
1242 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1243 next;
1244 }
1245 $consts=0 if (m/Lconsts:/o); # end of table
1246
1247 # instructions prefixed with '?' are endian-specific and need
1248 # to be adjusted accordingly...
1249 if ($flavour =~ /le$/o) { # little-endian
1250 s/le\?//o or
1251 s/be\?/#be#/o or
1252 s/\?lvsr/lvsl/o or
1253 s/\?lvsl/lvsr/o or
1254 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1255 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1256 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1257 } else { # big-endian
1258 s/le\?/#le#/o or
1259 s/be\?//o or
1260 s/\?([a-z]+)/$1/o;
1261 }
1262
1263 print $_,"\n";
1264 }
1265
1266 close STDOUT;