]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aesp8-ppc.pl
Add OpenSSL copyright to .pl files
[thirdparty/openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30
31 $flavour = shift;
32
33 if ($flavour =~ /64/) {
34 $SIZE_T =8;
35 $LRSAVE =2*$SIZE_T;
36 $STU ="stdu";
37 $POP ="ld";
38 $PUSH ="std";
39 $UCMP ="cmpld";
40 $SHL ="sldi";
41 } elsif ($flavour =~ /32/) {
42 $SIZE_T =4;
43 $LRSAVE =$SIZE_T;
44 $STU ="stwu";
45 $POP ="lwz";
46 $PUSH ="stw";
47 $UCMP ="cmplw";
48 $SHL ="slwi";
49 } else { die "nonsense $flavour"; }
50
51 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
52
53 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
54 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
55 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
56 die "can't locate ppc-xlate.pl";
57
58 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59
60 $FRAME=8*$SIZE_T;
61 $prefix="aes_p8";
62
63 $sp="r1";
64 $vrsave="r12";
65
66 #########################################################################
67 {{{ # Key setup procedures #
68 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
69 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
70 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
71
72 $code.=<<___;
73 .machine "any"
74
75 .text
76
77 .align 7
78 rcon:
79 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
80 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
81 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
82 .long 0,0,0,0 ?asis
83 Lconsts:
84 mflr r0
85 bcl 20,31,\$+4
86 mflr $ptr #vvvvv "distance between . and rcon
87 addi $ptr,$ptr,-0x48
88 mtlr r0
89 blr
90 .long 0
91 .byte 0,12,0x14,0,0,0,0,0
92 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
93
94 .globl .${prefix}_set_encrypt_key
95 .align 5
96 .${prefix}_set_encrypt_key:
97 Lset_encrypt_key:
98 mflr r11
99 $PUSH r11,$LRSAVE($sp)
100
101 li $ptr,-1
102 ${UCMP}i $inp,0
103 beq- Lenc_key_abort # if ($inp==0) return -1;
104 ${UCMP}i $out,0
105 beq- Lenc_key_abort # if ($out==0) return -1;
106 li $ptr,-2
107 cmpwi $bits,128
108 blt- Lenc_key_abort
109 cmpwi $bits,256
110 bgt- Lenc_key_abort
111 andi. r0,$bits,0x3f
112 bne- Lenc_key_abort
113
114 lis r0,0xfff0
115 mfspr $vrsave,256
116 mtspr 256,r0
117
118 bl Lconsts
119 mtlr r11
120
121 neg r9,$inp
122 lvx $in0,0,$inp
123 addi $inp,$inp,15 # 15 is not typo
124 lvsr $key,0,r9 # borrow $key
125 li r8,0x20
126 cmpwi $bits,192
127 lvx $in1,0,$inp
128 le?vspltisb $mask,0x0f # borrow $mask
129 lvx $rcon,0,$ptr
130 le?vxor $key,$key,$mask # adjust for byte swap
131 lvx $mask,r8,$ptr
132 addi $ptr,$ptr,0x10
133 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
134 li $cnt,8
135 vxor $zero,$zero,$zero
136 mtctr $cnt
137
138 ?lvsr $outperm,0,$out
139 vspltisb $outmask,-1
140 lvx $outhead,0,$out
141 ?vperm $outmask,$zero,$outmask,$outperm
142
143 blt Loop128
144 addi $inp,$inp,8
145 beq L192
146 addi $inp,$inp,8
147 b L256
148
149 .align 4
150 Loop128:
151 vperm $key,$in0,$in0,$mask # rotate-n-splat
152 vsldoi $tmp,$zero,$in0,12 # >>32
153 vperm $outtail,$in0,$in0,$outperm # rotate
154 vsel $stage,$outhead,$outtail,$outmask
155 vmr $outhead,$outtail
156 vcipherlast $key,$key,$rcon
157 stvx $stage,0,$out
158 addi $out,$out,16
159
160 vxor $in0,$in0,$tmp
161 vsldoi $tmp,$zero,$tmp,12 # >>32
162 vxor $in0,$in0,$tmp
163 vsldoi $tmp,$zero,$tmp,12 # >>32
164 vxor $in0,$in0,$tmp
165 vadduwm $rcon,$rcon,$rcon
166 vxor $in0,$in0,$key
167 bdnz Loop128
168
169 lvx $rcon,0,$ptr # last two round keys
170
171 vperm $key,$in0,$in0,$mask # rotate-n-splat
172 vsldoi $tmp,$zero,$in0,12 # >>32
173 vperm $outtail,$in0,$in0,$outperm # rotate
174 vsel $stage,$outhead,$outtail,$outmask
175 vmr $outhead,$outtail
176 vcipherlast $key,$key,$rcon
177 stvx $stage,0,$out
178 addi $out,$out,16
179
180 vxor $in0,$in0,$tmp
181 vsldoi $tmp,$zero,$tmp,12 # >>32
182 vxor $in0,$in0,$tmp
183 vsldoi $tmp,$zero,$tmp,12 # >>32
184 vxor $in0,$in0,$tmp
185 vadduwm $rcon,$rcon,$rcon
186 vxor $in0,$in0,$key
187
188 vperm $key,$in0,$in0,$mask # rotate-n-splat
189 vsldoi $tmp,$zero,$in0,12 # >>32
190 vperm $outtail,$in0,$in0,$outperm # rotate
191 vsel $stage,$outhead,$outtail,$outmask
192 vmr $outhead,$outtail
193 vcipherlast $key,$key,$rcon
194 stvx $stage,0,$out
195 addi $out,$out,16
196
197 vxor $in0,$in0,$tmp
198 vsldoi $tmp,$zero,$tmp,12 # >>32
199 vxor $in0,$in0,$tmp
200 vsldoi $tmp,$zero,$tmp,12 # >>32
201 vxor $in0,$in0,$tmp
202 vxor $in0,$in0,$key
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 stvx $stage,0,$out
207
208 addi $inp,$out,15 # 15 is not typo
209 addi $out,$out,0x50
210
211 li $rounds,10
212 b Ldone
213
214 .align 4
215 L192:
216 lvx $tmp,0,$inp
217 li $cnt,4
218 vperm $outtail,$in0,$in0,$outperm # rotate
219 vsel $stage,$outhead,$outtail,$outmask
220 vmr $outhead,$outtail
221 stvx $stage,0,$out
222 addi $out,$out,16
223 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
224 vspltisb $key,8 # borrow $key
225 mtctr $cnt
226 vsububm $mask,$mask,$key # adjust the mask
227
228 Loop192:
229 vperm $key,$in1,$in1,$mask # roate-n-splat
230 vsldoi $tmp,$zero,$in0,12 # >>32
231 vcipherlast $key,$key,$rcon
232
233 vxor $in0,$in0,$tmp
234 vsldoi $tmp,$zero,$tmp,12 # >>32
235 vxor $in0,$in0,$tmp
236 vsldoi $tmp,$zero,$tmp,12 # >>32
237 vxor $in0,$in0,$tmp
238
239 vsldoi $stage,$zero,$in1,8
240 vspltw $tmp,$in0,3
241 vxor $tmp,$tmp,$in1
242 vsldoi $in1,$zero,$in1,12 # >>32
243 vadduwm $rcon,$rcon,$rcon
244 vxor $in1,$in1,$tmp
245 vxor $in0,$in0,$key
246 vxor $in1,$in1,$key
247 vsldoi $stage,$stage,$in0,8
248
249 vperm $key,$in1,$in1,$mask # rotate-n-splat
250 vsldoi $tmp,$zero,$in0,12 # >>32
251 vperm $outtail,$stage,$stage,$outperm # rotate
252 vsel $stage,$outhead,$outtail,$outmask
253 vmr $outhead,$outtail
254 vcipherlast $key,$key,$rcon
255 stvx $stage,0,$out
256 addi $out,$out,16
257
258 vsldoi $stage,$in0,$in1,8
259 vxor $in0,$in0,$tmp
260 vsldoi $tmp,$zero,$tmp,12 # >>32
261 vperm $outtail,$stage,$stage,$outperm # rotate
262 vsel $stage,$outhead,$outtail,$outmask
263 vmr $outhead,$outtail
264 vxor $in0,$in0,$tmp
265 vsldoi $tmp,$zero,$tmp,12 # >>32
266 vxor $in0,$in0,$tmp
267 stvx $stage,0,$out
268 addi $out,$out,16
269
270 vspltw $tmp,$in0,3
271 vxor $tmp,$tmp,$in1
272 vsldoi $in1,$zero,$in1,12 # >>32
273 vadduwm $rcon,$rcon,$rcon
274 vxor $in1,$in1,$tmp
275 vxor $in0,$in0,$key
276 vxor $in1,$in1,$key
277 vperm $outtail,$in0,$in0,$outperm # rotate
278 vsel $stage,$outhead,$outtail,$outmask
279 vmr $outhead,$outtail
280 stvx $stage,0,$out
281 addi $inp,$out,15 # 15 is not typo
282 addi $out,$out,16
283 bdnz Loop192
284
285 li $rounds,12
286 addi $out,$out,0x20
287 b Ldone
288
289 .align 4
290 L256:
291 lvx $tmp,0,$inp
292 li $cnt,7
293 li $rounds,14
294 vperm $outtail,$in0,$in0,$outperm # rotate
295 vsel $stage,$outhead,$outtail,$outmask
296 vmr $outhead,$outtail
297 stvx $stage,0,$out
298 addi $out,$out,16
299 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
300 mtctr $cnt
301
302 Loop256:
303 vperm $key,$in1,$in1,$mask # rotate-n-splat
304 vsldoi $tmp,$zero,$in0,12 # >>32
305 vperm $outtail,$in1,$in1,$outperm # rotate
306 vsel $stage,$outhead,$outtail,$outmask
307 vmr $outhead,$outtail
308 vcipherlast $key,$key,$rcon
309 stvx $stage,0,$out
310 addi $out,$out,16
311
312 vxor $in0,$in0,$tmp
313 vsldoi $tmp,$zero,$tmp,12 # >>32
314 vxor $in0,$in0,$tmp
315 vsldoi $tmp,$zero,$tmp,12 # >>32
316 vxor $in0,$in0,$tmp
317 vadduwm $rcon,$rcon,$rcon
318 vxor $in0,$in0,$key
319 vperm $outtail,$in0,$in0,$outperm # rotate
320 vsel $stage,$outhead,$outtail,$outmask
321 vmr $outhead,$outtail
322 stvx $stage,0,$out
323 addi $inp,$out,15 # 15 is not typo
324 addi $out,$out,16
325 bdz Ldone
326
327 vspltw $key,$in0,3 # just splat
328 vsldoi $tmp,$zero,$in1,12 # >>32
329 vsbox $key,$key
330
331 vxor $in1,$in1,$tmp
332 vsldoi $tmp,$zero,$tmp,12 # >>32
333 vxor $in1,$in1,$tmp
334 vsldoi $tmp,$zero,$tmp,12 # >>32
335 vxor $in1,$in1,$tmp
336
337 vxor $in1,$in1,$key
338 b Loop256
339
340 .align 4
341 Ldone:
342 lvx $in1,0,$inp # redundant in aligned case
343 vsel $in1,$outhead,$in1,$outmask
344 stvx $in1,0,$inp
345 li $ptr,0
346 mtspr 256,$vrsave
347 stw $rounds,0($out)
348
349 Lenc_key_abort:
350 mr r3,$ptr
351 blr
352 .long 0
353 .byte 0,12,0x14,1,0,0,3,0
354 .long 0
355 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
356
357 .globl .${prefix}_set_decrypt_key
358 .align 5
359 .${prefix}_set_decrypt_key:
360 $STU $sp,-$FRAME($sp)
361 mflr r10
362 $PUSH r10,$FRAME+$LRSAVE($sp)
363 bl Lset_encrypt_key
364 mtlr r10
365
366 cmpwi r3,0
367 bne- Ldec_key_abort
368
369 slwi $cnt,$rounds,4
370 subi $inp,$out,240 # first round key
371 srwi $rounds,$rounds,1
372 add $out,$inp,$cnt # last round key
373 mtctr $rounds
374
375 Ldeckey:
376 lwz r0, 0($inp)
377 lwz r6, 4($inp)
378 lwz r7, 8($inp)
379 lwz r8, 12($inp)
380 addi $inp,$inp,16
381 lwz r9, 0($out)
382 lwz r10,4($out)
383 lwz r11,8($out)
384 lwz r12,12($out)
385 stw r0, 0($out)
386 stw r6, 4($out)
387 stw r7, 8($out)
388 stw r8, 12($out)
389 subi $out,$out,16
390 stw r9, -16($inp)
391 stw r10,-12($inp)
392 stw r11,-8($inp)
393 stw r12,-4($inp)
394 bdnz Ldeckey
395
396 xor r3,r3,r3 # return value
397 Ldec_key_abort:
398 addi $sp,$sp,$FRAME
399 blr
400 .long 0
401 .byte 0,12,4,1,0x80,0,3,0
402 .long 0
403 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
404 ___
405 }}}
406 #########################################################################
407 {{{ # Single block en- and decrypt procedures #
408 sub gen_block () {
409 my $dir = shift;
410 my $n = $dir eq "de" ? "n" : "";
411 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
412
413 $code.=<<___;
414 .globl .${prefix}_${dir}crypt
415 .align 5
416 .${prefix}_${dir}crypt:
417 lwz $rounds,240($key)
418 lis r0,0xfc00
419 mfspr $vrsave,256
420 li $idx,15 # 15 is not typo
421 mtspr 256,r0
422
423 lvx v0,0,$inp
424 neg r11,$out
425 lvx v1,$idx,$inp
426 lvsl v2,0,$inp # inpperm
427 le?vspltisb v4,0x0f
428 ?lvsl v3,0,r11 # outperm
429 le?vxor v2,v2,v4
430 li $idx,16
431 vperm v0,v0,v1,v2 # align [and byte swap in LE]
432 lvx v1,0,$key
433 ?lvsl v5,0,$key # keyperm
434 srwi $rounds,$rounds,1
435 lvx v2,$idx,$key
436 addi $idx,$idx,16
437 subi $rounds,$rounds,1
438 ?vperm v1,v1,v2,v5 # align round key
439
440 vxor v0,v0,v1
441 lvx v1,$idx,$key
442 addi $idx,$idx,16
443 mtctr $rounds
444
445 Loop_${dir}c:
446 ?vperm v2,v2,v1,v5
447 v${n}cipher v0,v0,v2
448 lvx v2,$idx,$key
449 addi $idx,$idx,16
450 ?vperm v1,v1,v2,v5
451 v${n}cipher v0,v0,v1
452 lvx v1,$idx,$key
453 addi $idx,$idx,16
454 bdnz Loop_${dir}c
455
456 ?vperm v2,v2,v1,v5
457 v${n}cipher v0,v0,v2
458 lvx v2,$idx,$key
459 ?vperm v1,v1,v2,v5
460 v${n}cipherlast v0,v0,v1
461
462 vspltisb v2,-1
463 vxor v1,v1,v1
464 li $idx,15 # 15 is not typo
465 ?vperm v2,v1,v2,v3 # outmask
466 le?vxor v3,v3,v4
467 lvx v1,0,$out # outhead
468 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
469 vsel v1,v1,v0,v2
470 lvx v4,$idx,$out
471 stvx v1,0,$out
472 vsel v0,v0,v4,v2
473 stvx v0,$idx,$out
474
475 mtspr 256,$vrsave
476 blr
477 .long 0
478 .byte 0,12,0x14,0,0,0,3,0
479 .long 0
480 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
481 ___
482 }
483 &gen_block("en");
484 &gen_block("de");
485 }}}
486 #########################################################################
487 {{{ # CBC en- and decrypt procedures #
488 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
489 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
490 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
491 map("v$_",(4..10));
492 $code.=<<___;
493 .globl .${prefix}_cbc_encrypt
494 .align 5
495 .${prefix}_cbc_encrypt:
496 ${UCMP}i $len,16
497 bltlr-
498
499 cmpwi $enc,0 # test direction
500 lis r0,0xffe0
501 mfspr $vrsave,256
502 mtspr 256,r0
503
504 li $idx,15
505 vxor $rndkey0,$rndkey0,$rndkey0
506 le?vspltisb $tmp,0x0f
507
508 lvx $ivec,0,$ivp # load [unaligned] iv
509 lvsl $inpperm,0,$ivp
510 lvx $inptail,$idx,$ivp
511 le?vxor $inpperm,$inpperm,$tmp
512 vperm $ivec,$ivec,$inptail,$inpperm
513
514 neg r11,$inp
515 ?lvsl $keyperm,0,$key # prepare for unaligned key
516 lwz $rounds,240($key)
517
518 lvsr $inpperm,0,r11 # prepare for unaligned load
519 lvx $inptail,0,$inp
520 addi $inp,$inp,15 # 15 is not typo
521 le?vxor $inpperm,$inpperm,$tmp
522
523 ?lvsr $outperm,0,$out # prepare for unaligned store
524 vspltisb $outmask,-1
525 lvx $outhead,0,$out
526 ?vperm $outmask,$rndkey0,$outmask,$outperm
527 le?vxor $outperm,$outperm,$tmp
528
529 srwi $rounds,$rounds,1
530 li $idx,16
531 subi $rounds,$rounds,1
532 beq Lcbc_dec
533
534 Lcbc_enc:
535 vmr $inout,$inptail
536 lvx $inptail,0,$inp
537 addi $inp,$inp,16
538 mtctr $rounds
539 subi $len,$len,16 # len-=16
540
541 lvx $rndkey0,0,$key
542 vperm $inout,$inout,$inptail,$inpperm
543 lvx $rndkey1,$idx,$key
544 addi $idx,$idx,16
545 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
546 vxor $inout,$inout,$rndkey0
547 lvx $rndkey0,$idx,$key
548 addi $idx,$idx,16
549 vxor $inout,$inout,$ivec
550
551 Loop_cbc_enc:
552 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
553 vcipher $inout,$inout,$rndkey1
554 lvx $rndkey1,$idx,$key
555 addi $idx,$idx,16
556 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
557 vcipher $inout,$inout,$rndkey0
558 lvx $rndkey0,$idx,$key
559 addi $idx,$idx,16
560 bdnz Loop_cbc_enc
561
562 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
563 vcipher $inout,$inout,$rndkey1
564 lvx $rndkey1,$idx,$key
565 li $idx,16
566 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
567 vcipherlast $ivec,$inout,$rndkey0
568 ${UCMP}i $len,16
569
570 vperm $tmp,$ivec,$ivec,$outperm
571 vsel $inout,$outhead,$tmp,$outmask
572 vmr $outhead,$tmp
573 stvx $inout,0,$out
574 addi $out,$out,16
575 bge Lcbc_enc
576
577 b Lcbc_done
578
579 .align 4
580 Lcbc_dec:
581 ${UCMP}i $len,128
582 bge _aesp8_cbc_decrypt8x
583 vmr $tmp,$inptail
584 lvx $inptail,0,$inp
585 addi $inp,$inp,16
586 mtctr $rounds
587 subi $len,$len,16 # len-=16
588
589 lvx $rndkey0,0,$key
590 vperm $tmp,$tmp,$inptail,$inpperm
591 lvx $rndkey1,$idx,$key
592 addi $idx,$idx,16
593 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
594 vxor $inout,$tmp,$rndkey0
595 lvx $rndkey0,$idx,$key
596 addi $idx,$idx,16
597
598 Loop_cbc_dec:
599 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
600 vncipher $inout,$inout,$rndkey1
601 lvx $rndkey1,$idx,$key
602 addi $idx,$idx,16
603 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
604 vncipher $inout,$inout,$rndkey0
605 lvx $rndkey0,$idx,$key
606 addi $idx,$idx,16
607 bdnz Loop_cbc_dec
608
609 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
610 vncipher $inout,$inout,$rndkey1
611 lvx $rndkey1,$idx,$key
612 li $idx,16
613 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
614 vncipherlast $inout,$inout,$rndkey0
615 ${UCMP}i $len,16
616
617 vxor $inout,$inout,$ivec
618 vmr $ivec,$tmp
619 vperm $tmp,$inout,$inout,$outperm
620 vsel $inout,$outhead,$tmp,$outmask
621 vmr $outhead,$tmp
622 stvx $inout,0,$out
623 addi $out,$out,16
624 bge Lcbc_dec
625
626 Lcbc_done:
627 addi $out,$out,-1
628 lvx $inout,0,$out # redundant in aligned case
629 vsel $inout,$outhead,$inout,$outmask
630 stvx $inout,0,$out
631
632 neg $enc,$ivp # write [unaligned] iv
633 li $idx,15 # 15 is not typo
634 vxor $rndkey0,$rndkey0,$rndkey0
635 vspltisb $outmask,-1
636 le?vspltisb $tmp,0x0f
637 ?lvsl $outperm,0,$enc
638 ?vperm $outmask,$rndkey0,$outmask,$outperm
639 le?vxor $outperm,$outperm,$tmp
640 lvx $outhead,0,$ivp
641 vperm $ivec,$ivec,$ivec,$outperm
642 vsel $inout,$outhead,$ivec,$outmask
643 lvx $inptail,$idx,$ivp
644 stvx $inout,0,$ivp
645 vsel $inout,$ivec,$inptail,$outmask
646 stvx $inout,$idx,$ivp
647
648 mtspr 256,$vrsave
649 blr
650 .long 0
651 .byte 0,12,0x14,0,0,0,6,0
652 .long 0
653 ___
654 #########################################################################
655 {{ # Optimized CBC decrypt procedure #
656 my $key_="r11";
657 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
658 $x00=0 if ($flavour =~ /osx/);
659 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
660 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
661 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
662 # v26-v31 last 6 round keys
663 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
664
665 $code.=<<___;
666 .align 5
667 _aesp8_cbc_decrypt8x:
668 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
669 li r10,`$FRAME+8*16+15`
670 li r11,`$FRAME+8*16+31`
671 stvx v20,r10,$sp # ABI says so
672 addi r10,r10,32
673 stvx v21,r11,$sp
674 addi r11,r11,32
675 stvx v22,r10,$sp
676 addi r10,r10,32
677 stvx v23,r11,$sp
678 addi r11,r11,32
679 stvx v24,r10,$sp
680 addi r10,r10,32
681 stvx v25,r11,$sp
682 addi r11,r11,32
683 stvx v26,r10,$sp
684 addi r10,r10,32
685 stvx v27,r11,$sp
686 addi r11,r11,32
687 stvx v28,r10,$sp
688 addi r10,r10,32
689 stvx v29,r11,$sp
690 addi r11,r11,32
691 stvx v30,r10,$sp
692 stvx v31,r11,$sp
693 li r0,-1
694 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
695 li $x10,0x10
696 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
697 li $x20,0x20
698 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
699 li $x30,0x30
700 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
701 li $x40,0x40
702 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
703 li $x50,0x50
704 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
705 li $x60,0x60
706 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
707 li $x70,0x70
708 mtspr 256,r0
709
710 subi $rounds,$rounds,3 # -4 in total
711 subi $len,$len,128 # bias
712
713 lvx $rndkey0,$x00,$key # load key schedule
714 lvx v30,$x10,$key
715 addi $key,$key,0x20
716 lvx v31,$x00,$key
717 ?vperm $rndkey0,$rndkey0,v30,$keyperm
718 addi $key_,$sp,$FRAME+15
719 mtctr $rounds
720
721 Load_cbc_dec_key:
722 ?vperm v24,v30,v31,$keyperm
723 lvx v30,$x10,$key
724 addi $key,$key,0x20
725 stvx v24,$x00,$key_ # off-load round[1]
726 ?vperm v25,v31,v30,$keyperm
727 lvx v31,$x00,$key
728 stvx v25,$x10,$key_ # off-load round[2]
729 addi $key_,$key_,0x20
730 bdnz Load_cbc_dec_key
731
732 lvx v26,$x10,$key
733 ?vperm v24,v30,v31,$keyperm
734 lvx v27,$x20,$key
735 stvx v24,$x00,$key_ # off-load round[3]
736 ?vperm v25,v31,v26,$keyperm
737 lvx v28,$x30,$key
738 stvx v25,$x10,$key_ # off-load round[4]
739 addi $key_,$sp,$FRAME+15 # rewind $key_
740 ?vperm v26,v26,v27,$keyperm
741 lvx v29,$x40,$key
742 ?vperm v27,v27,v28,$keyperm
743 lvx v30,$x50,$key
744 ?vperm v28,v28,v29,$keyperm
745 lvx v31,$x60,$key
746 ?vperm v29,v29,v30,$keyperm
747 lvx $out0,$x70,$key # borrow $out0
748 ?vperm v30,v30,v31,$keyperm
749 lvx v24,$x00,$key_ # pre-load round[1]
750 ?vperm v31,v31,$out0,$keyperm
751 lvx v25,$x10,$key_ # pre-load round[2]
752
753 #lvx $inptail,0,$inp # "caller" already did this
754 #addi $inp,$inp,15 # 15 is not typo
755 subi $inp,$inp,15 # undo "caller"
756
757 le?li $idx,8
758 lvx_u $in0,$x00,$inp # load first 8 "words"
759 le?lvsl $inpperm,0,$idx
760 le?vspltisb $tmp,0x0f
761 lvx_u $in1,$x10,$inp
762 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
763 lvx_u $in2,$x20,$inp
764 le?vperm $in0,$in0,$in0,$inpperm
765 lvx_u $in3,$x30,$inp
766 le?vperm $in1,$in1,$in1,$inpperm
767 lvx_u $in4,$x40,$inp
768 le?vperm $in2,$in2,$in2,$inpperm
769 vxor $out0,$in0,$rndkey0
770 lvx_u $in5,$x50,$inp
771 le?vperm $in3,$in3,$in3,$inpperm
772 vxor $out1,$in1,$rndkey0
773 lvx_u $in6,$x60,$inp
774 le?vperm $in4,$in4,$in4,$inpperm
775 vxor $out2,$in2,$rndkey0
776 lvx_u $in7,$x70,$inp
777 addi $inp,$inp,0x80
778 le?vperm $in5,$in5,$in5,$inpperm
779 vxor $out3,$in3,$rndkey0
780 le?vperm $in6,$in6,$in6,$inpperm
781 vxor $out4,$in4,$rndkey0
782 le?vperm $in7,$in7,$in7,$inpperm
783 vxor $out5,$in5,$rndkey0
784 vxor $out6,$in6,$rndkey0
785 vxor $out7,$in7,$rndkey0
786
787 mtctr $rounds
788 b Loop_cbc_dec8x
789 .align 5
790 Loop_cbc_dec8x:
791 vncipher $out0,$out0,v24
792 vncipher $out1,$out1,v24
793 vncipher $out2,$out2,v24
794 vncipher $out3,$out3,v24
795 vncipher $out4,$out4,v24
796 vncipher $out5,$out5,v24
797 vncipher $out6,$out6,v24
798 vncipher $out7,$out7,v24
799 lvx v24,$x20,$key_ # round[3]
800 addi $key_,$key_,0x20
801
802 vncipher $out0,$out0,v25
803 vncipher $out1,$out1,v25
804 vncipher $out2,$out2,v25
805 vncipher $out3,$out3,v25
806 vncipher $out4,$out4,v25
807 vncipher $out5,$out5,v25
808 vncipher $out6,$out6,v25
809 vncipher $out7,$out7,v25
810 lvx v25,$x10,$key_ # round[4]
811 bdnz Loop_cbc_dec8x
812
813 subic $len,$len,128 # $len-=128
814 vncipher $out0,$out0,v24
815 vncipher $out1,$out1,v24
816 vncipher $out2,$out2,v24
817 vncipher $out3,$out3,v24
818 vncipher $out4,$out4,v24
819 vncipher $out5,$out5,v24
820 vncipher $out6,$out6,v24
821 vncipher $out7,$out7,v24
822
823 subfe. r0,r0,r0 # borrow?-1:0
824 vncipher $out0,$out0,v25
825 vncipher $out1,$out1,v25
826 vncipher $out2,$out2,v25
827 vncipher $out3,$out3,v25
828 vncipher $out4,$out4,v25
829 vncipher $out5,$out5,v25
830 vncipher $out6,$out6,v25
831 vncipher $out7,$out7,v25
832
833 and r0,r0,$len
834 vncipher $out0,$out0,v26
835 vncipher $out1,$out1,v26
836 vncipher $out2,$out2,v26
837 vncipher $out3,$out3,v26
838 vncipher $out4,$out4,v26
839 vncipher $out5,$out5,v26
840 vncipher $out6,$out6,v26
841 vncipher $out7,$out7,v26
842
843 add $inp,$inp,r0 # $inp is adjusted in such
844 # way that at exit from the
845 # loop inX-in7 are loaded
846 # with last "words"
847 vncipher $out0,$out0,v27
848 vncipher $out1,$out1,v27
849 vncipher $out2,$out2,v27
850 vncipher $out3,$out3,v27
851 vncipher $out4,$out4,v27
852 vncipher $out5,$out5,v27
853 vncipher $out6,$out6,v27
854 vncipher $out7,$out7,v27
855
856 addi $key_,$sp,$FRAME+15 # rewind $key_
857 vncipher $out0,$out0,v28
858 vncipher $out1,$out1,v28
859 vncipher $out2,$out2,v28
860 vncipher $out3,$out3,v28
861 vncipher $out4,$out4,v28
862 vncipher $out5,$out5,v28
863 vncipher $out6,$out6,v28
864 vncipher $out7,$out7,v28
865 lvx v24,$x00,$key_ # re-pre-load round[1]
866
867 vncipher $out0,$out0,v29
868 vncipher $out1,$out1,v29
869 vncipher $out2,$out2,v29
870 vncipher $out3,$out3,v29
871 vncipher $out4,$out4,v29
872 vncipher $out5,$out5,v29
873 vncipher $out6,$out6,v29
874 vncipher $out7,$out7,v29
875 lvx v25,$x10,$key_ # re-pre-load round[2]
876
877 vncipher $out0,$out0,v30
878 vxor $ivec,$ivec,v31 # xor with last round key
879 vncipher $out1,$out1,v30
880 vxor $in0,$in0,v31
881 vncipher $out2,$out2,v30
882 vxor $in1,$in1,v31
883 vncipher $out3,$out3,v30
884 vxor $in2,$in2,v31
885 vncipher $out4,$out4,v30
886 vxor $in3,$in3,v31
887 vncipher $out5,$out5,v30
888 vxor $in4,$in4,v31
889 vncipher $out6,$out6,v30
890 vxor $in5,$in5,v31
891 vncipher $out7,$out7,v30
892 vxor $in6,$in6,v31
893
894 vncipherlast $out0,$out0,$ivec
895 vncipherlast $out1,$out1,$in0
896 lvx_u $in0,$x00,$inp # load next input block
897 vncipherlast $out2,$out2,$in1
898 lvx_u $in1,$x10,$inp
899 vncipherlast $out3,$out3,$in2
900 le?vperm $in0,$in0,$in0,$inpperm
901 lvx_u $in2,$x20,$inp
902 vncipherlast $out4,$out4,$in3
903 le?vperm $in1,$in1,$in1,$inpperm
904 lvx_u $in3,$x30,$inp
905 vncipherlast $out5,$out5,$in4
906 le?vperm $in2,$in2,$in2,$inpperm
907 lvx_u $in4,$x40,$inp
908 vncipherlast $out6,$out6,$in5
909 le?vperm $in3,$in3,$in3,$inpperm
910 lvx_u $in5,$x50,$inp
911 vncipherlast $out7,$out7,$in6
912 le?vperm $in4,$in4,$in4,$inpperm
913 lvx_u $in6,$x60,$inp
914 vmr $ivec,$in7
915 le?vperm $in5,$in5,$in5,$inpperm
916 lvx_u $in7,$x70,$inp
917 addi $inp,$inp,0x80
918
919 le?vperm $out0,$out0,$out0,$inpperm
920 le?vperm $out1,$out1,$out1,$inpperm
921 stvx_u $out0,$x00,$out
922 le?vperm $in6,$in6,$in6,$inpperm
923 vxor $out0,$in0,$rndkey0
924 le?vperm $out2,$out2,$out2,$inpperm
925 stvx_u $out1,$x10,$out
926 le?vperm $in7,$in7,$in7,$inpperm
927 vxor $out1,$in1,$rndkey0
928 le?vperm $out3,$out3,$out3,$inpperm
929 stvx_u $out2,$x20,$out
930 vxor $out2,$in2,$rndkey0
931 le?vperm $out4,$out4,$out4,$inpperm
932 stvx_u $out3,$x30,$out
933 vxor $out3,$in3,$rndkey0
934 le?vperm $out5,$out5,$out5,$inpperm
935 stvx_u $out4,$x40,$out
936 vxor $out4,$in4,$rndkey0
937 le?vperm $out6,$out6,$out6,$inpperm
938 stvx_u $out5,$x50,$out
939 vxor $out5,$in5,$rndkey0
940 le?vperm $out7,$out7,$out7,$inpperm
941 stvx_u $out6,$x60,$out
942 vxor $out6,$in6,$rndkey0
943 stvx_u $out7,$x70,$out
944 addi $out,$out,0x80
945 vxor $out7,$in7,$rndkey0
946
947 mtctr $rounds
948 beq Loop_cbc_dec8x # did $len-=128 borrow?
949
950 addic. $len,$len,128
951 beq Lcbc_dec8x_done
952 nop
953 nop
954
955 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
956 vncipher $out1,$out1,v24
957 vncipher $out2,$out2,v24
958 vncipher $out3,$out3,v24
959 vncipher $out4,$out4,v24
960 vncipher $out5,$out5,v24
961 vncipher $out6,$out6,v24
962 vncipher $out7,$out7,v24
963 lvx v24,$x20,$key_ # round[3]
964 addi $key_,$key_,0x20
965
966 vncipher $out1,$out1,v25
967 vncipher $out2,$out2,v25
968 vncipher $out3,$out3,v25
969 vncipher $out4,$out4,v25
970 vncipher $out5,$out5,v25
971 vncipher $out6,$out6,v25
972 vncipher $out7,$out7,v25
973 lvx v25,$x10,$key_ # round[4]
974 bdnz Loop_cbc_dec8x_tail
975
976 vncipher $out1,$out1,v24
977 vncipher $out2,$out2,v24
978 vncipher $out3,$out3,v24
979 vncipher $out4,$out4,v24
980 vncipher $out5,$out5,v24
981 vncipher $out6,$out6,v24
982 vncipher $out7,$out7,v24
983
984 vncipher $out1,$out1,v25
985 vncipher $out2,$out2,v25
986 vncipher $out3,$out3,v25
987 vncipher $out4,$out4,v25
988 vncipher $out5,$out5,v25
989 vncipher $out6,$out6,v25
990 vncipher $out7,$out7,v25
991
992 vncipher $out1,$out1,v26
993 vncipher $out2,$out2,v26
994 vncipher $out3,$out3,v26
995 vncipher $out4,$out4,v26
996 vncipher $out5,$out5,v26
997 vncipher $out6,$out6,v26
998 vncipher $out7,$out7,v26
999
1000 vncipher $out1,$out1,v27
1001 vncipher $out2,$out2,v27
1002 vncipher $out3,$out3,v27
1003 vncipher $out4,$out4,v27
1004 vncipher $out5,$out5,v27
1005 vncipher $out6,$out6,v27
1006 vncipher $out7,$out7,v27
1007
1008 vncipher $out1,$out1,v28
1009 vncipher $out2,$out2,v28
1010 vncipher $out3,$out3,v28
1011 vncipher $out4,$out4,v28
1012 vncipher $out5,$out5,v28
1013 vncipher $out6,$out6,v28
1014 vncipher $out7,$out7,v28
1015
1016 vncipher $out1,$out1,v29
1017 vncipher $out2,$out2,v29
1018 vncipher $out3,$out3,v29
1019 vncipher $out4,$out4,v29
1020 vncipher $out5,$out5,v29
1021 vncipher $out6,$out6,v29
1022 vncipher $out7,$out7,v29
1023
1024 vncipher $out1,$out1,v30
1025 vxor $ivec,$ivec,v31 # last round key
1026 vncipher $out2,$out2,v30
1027 vxor $in1,$in1,v31
1028 vncipher $out3,$out3,v30
1029 vxor $in2,$in2,v31
1030 vncipher $out4,$out4,v30
1031 vxor $in3,$in3,v31
1032 vncipher $out5,$out5,v30
1033 vxor $in4,$in4,v31
1034 vncipher $out6,$out6,v30
1035 vxor $in5,$in5,v31
1036 vncipher $out7,$out7,v30
1037 vxor $in6,$in6,v31
1038
1039 cmplwi $len,32 # switch($len)
1040 blt Lcbc_dec8x_one
1041 nop
1042 beq Lcbc_dec8x_two
1043 cmplwi $len,64
1044 blt Lcbc_dec8x_three
1045 nop
1046 beq Lcbc_dec8x_four
1047 cmplwi $len,96
1048 blt Lcbc_dec8x_five
1049 nop
1050 beq Lcbc_dec8x_six
1051
1052 Lcbc_dec8x_seven:
1053 vncipherlast $out1,$out1,$ivec
1054 vncipherlast $out2,$out2,$in1
1055 vncipherlast $out3,$out3,$in2
1056 vncipherlast $out4,$out4,$in3
1057 vncipherlast $out5,$out5,$in4
1058 vncipherlast $out6,$out6,$in5
1059 vncipherlast $out7,$out7,$in6
1060 vmr $ivec,$in7
1061
1062 le?vperm $out1,$out1,$out1,$inpperm
1063 le?vperm $out2,$out2,$out2,$inpperm
1064 stvx_u $out1,$x00,$out
1065 le?vperm $out3,$out3,$out3,$inpperm
1066 stvx_u $out2,$x10,$out
1067 le?vperm $out4,$out4,$out4,$inpperm
1068 stvx_u $out3,$x20,$out
1069 le?vperm $out5,$out5,$out5,$inpperm
1070 stvx_u $out4,$x30,$out
1071 le?vperm $out6,$out6,$out6,$inpperm
1072 stvx_u $out5,$x40,$out
1073 le?vperm $out7,$out7,$out7,$inpperm
1074 stvx_u $out6,$x50,$out
1075 stvx_u $out7,$x60,$out
1076 addi $out,$out,0x70
1077 b Lcbc_dec8x_done
1078
1079 .align 5
1080 Lcbc_dec8x_six:
1081 vncipherlast $out2,$out2,$ivec
1082 vncipherlast $out3,$out3,$in2
1083 vncipherlast $out4,$out4,$in3
1084 vncipherlast $out5,$out5,$in4
1085 vncipherlast $out6,$out6,$in5
1086 vncipherlast $out7,$out7,$in6
1087 vmr $ivec,$in7
1088
1089 le?vperm $out2,$out2,$out2,$inpperm
1090 le?vperm $out3,$out3,$out3,$inpperm
1091 stvx_u $out2,$x00,$out
1092 le?vperm $out4,$out4,$out4,$inpperm
1093 stvx_u $out3,$x10,$out
1094 le?vperm $out5,$out5,$out5,$inpperm
1095 stvx_u $out4,$x20,$out
1096 le?vperm $out6,$out6,$out6,$inpperm
1097 stvx_u $out5,$x30,$out
1098 le?vperm $out7,$out7,$out7,$inpperm
1099 stvx_u $out6,$x40,$out
1100 stvx_u $out7,$x50,$out
1101 addi $out,$out,0x60
1102 b Lcbc_dec8x_done
1103
1104 .align 5
1105 Lcbc_dec8x_five:
1106 vncipherlast $out3,$out3,$ivec
1107 vncipherlast $out4,$out4,$in3
1108 vncipherlast $out5,$out5,$in4
1109 vncipherlast $out6,$out6,$in5
1110 vncipherlast $out7,$out7,$in6
1111 vmr $ivec,$in7
1112
1113 le?vperm $out3,$out3,$out3,$inpperm
1114 le?vperm $out4,$out4,$out4,$inpperm
1115 stvx_u $out3,$x00,$out
1116 le?vperm $out5,$out5,$out5,$inpperm
1117 stvx_u $out4,$x10,$out
1118 le?vperm $out6,$out6,$out6,$inpperm
1119 stvx_u $out5,$x20,$out
1120 le?vperm $out7,$out7,$out7,$inpperm
1121 stvx_u $out6,$x30,$out
1122 stvx_u $out7,$x40,$out
1123 addi $out,$out,0x50
1124 b Lcbc_dec8x_done
1125
1126 .align 5
1127 Lcbc_dec8x_four:
1128 vncipherlast $out4,$out4,$ivec
1129 vncipherlast $out5,$out5,$in4
1130 vncipherlast $out6,$out6,$in5
1131 vncipherlast $out7,$out7,$in6
1132 vmr $ivec,$in7
1133
1134 le?vperm $out4,$out4,$out4,$inpperm
1135 le?vperm $out5,$out5,$out5,$inpperm
1136 stvx_u $out4,$x00,$out
1137 le?vperm $out6,$out6,$out6,$inpperm
1138 stvx_u $out5,$x10,$out
1139 le?vperm $out7,$out7,$out7,$inpperm
1140 stvx_u $out6,$x20,$out
1141 stvx_u $out7,$x30,$out
1142 addi $out,$out,0x40
1143 b Lcbc_dec8x_done
1144
1145 .align 5
1146 Lcbc_dec8x_three:
1147 vncipherlast $out5,$out5,$ivec
1148 vncipherlast $out6,$out6,$in5
1149 vncipherlast $out7,$out7,$in6
1150 vmr $ivec,$in7
1151
1152 le?vperm $out5,$out5,$out5,$inpperm
1153 le?vperm $out6,$out6,$out6,$inpperm
1154 stvx_u $out5,$x00,$out
1155 le?vperm $out7,$out7,$out7,$inpperm
1156 stvx_u $out6,$x10,$out
1157 stvx_u $out7,$x20,$out
1158 addi $out,$out,0x30
1159 b Lcbc_dec8x_done
1160
1161 .align 5
1162 Lcbc_dec8x_two:
1163 vncipherlast $out6,$out6,$ivec
1164 vncipherlast $out7,$out7,$in6
1165 vmr $ivec,$in7
1166
1167 le?vperm $out6,$out6,$out6,$inpperm
1168 le?vperm $out7,$out7,$out7,$inpperm
1169 stvx_u $out6,$x00,$out
1170 stvx_u $out7,$x10,$out
1171 addi $out,$out,0x20
1172 b Lcbc_dec8x_done
1173
1174 .align 5
1175 Lcbc_dec8x_one:
1176 vncipherlast $out7,$out7,$ivec
1177 vmr $ivec,$in7
1178
1179 le?vperm $out7,$out7,$out7,$inpperm
1180 stvx_u $out7,0,$out
1181 addi $out,$out,0x10
1182
1183 Lcbc_dec8x_done:
1184 le?vperm $ivec,$ivec,$ivec,$inpperm
1185 stvx_u $ivec,0,$ivp # write [unaligned] iv
1186
1187 li r10,`$FRAME+15`
1188 li r11,`$FRAME+31`
1189 stvx $inpperm,r10,$sp # wipe copies of round keys
1190 addi r10,r10,32
1191 stvx $inpperm,r11,$sp
1192 addi r11,r11,32
1193 stvx $inpperm,r10,$sp
1194 addi r10,r10,32
1195 stvx $inpperm,r11,$sp
1196 addi r11,r11,32
1197 stvx $inpperm,r10,$sp
1198 addi r10,r10,32
1199 stvx $inpperm,r11,$sp
1200 addi r11,r11,32
1201 stvx $inpperm,r10,$sp
1202 addi r10,r10,32
1203 stvx $inpperm,r11,$sp
1204 addi r11,r11,32
1205
1206 mtspr 256,$vrsave
1207 lvx v20,r10,$sp # ABI says so
1208 addi r10,r10,32
1209 lvx v21,r11,$sp
1210 addi r11,r11,32
1211 lvx v22,r10,$sp
1212 addi r10,r10,32
1213 lvx v23,r11,$sp
1214 addi r11,r11,32
1215 lvx v24,r10,$sp
1216 addi r10,r10,32
1217 lvx v25,r11,$sp
1218 addi r11,r11,32
1219 lvx v26,r10,$sp
1220 addi r10,r10,32
1221 lvx v27,r11,$sp
1222 addi r11,r11,32
1223 lvx v28,r10,$sp
1224 addi r10,r10,32
1225 lvx v29,r11,$sp
1226 addi r11,r11,32
1227 lvx v30,r10,$sp
1228 lvx v31,r11,$sp
1229 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1230 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1231 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1232 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1233 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1234 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1235 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1236 blr
1237 .long 0
1238 .byte 0,12,0x04,0,0x80,6,6,0
1239 .long 0
1240 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1241 ___
1242 }} }}}
1243
1244 #########################################################################
1245 {{{ # CTR procedure[s] #
1246 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1247 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1248 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1249 map("v$_",(4..11));
1250 my $dat=$tmp;
1251
1252 $code.=<<___;
1253 .globl .${prefix}_ctr32_encrypt_blocks
1254 .align 5
1255 .${prefix}_ctr32_encrypt_blocks:
1256 ${UCMP}i $len,1
1257 bltlr-
1258
1259 lis r0,0xfff0
1260 mfspr $vrsave,256
1261 mtspr 256,r0
1262
1263 li $idx,15
1264 vxor $rndkey0,$rndkey0,$rndkey0
1265 le?vspltisb $tmp,0x0f
1266
1267 lvx $ivec,0,$ivp # load [unaligned] iv
1268 lvsl $inpperm,0,$ivp
1269 lvx $inptail,$idx,$ivp
1270 vspltisb $one,1
1271 le?vxor $inpperm,$inpperm,$tmp
1272 vperm $ivec,$ivec,$inptail,$inpperm
1273 vsldoi $one,$rndkey0,$one,1
1274
1275 neg r11,$inp
1276 ?lvsl $keyperm,0,$key # prepare for unaligned key
1277 lwz $rounds,240($key)
1278
1279 lvsr $inpperm,0,r11 # prepare for unaligned load
1280 lvx $inptail,0,$inp
1281 addi $inp,$inp,15 # 15 is not typo
1282 le?vxor $inpperm,$inpperm,$tmp
1283
1284 srwi $rounds,$rounds,1
1285 li $idx,16
1286 subi $rounds,$rounds,1
1287
1288 ${UCMP}i $len,8
1289 bge _aesp8_ctr32_encrypt8x
1290
1291 ?lvsr $outperm,0,$out # prepare for unaligned store
1292 vspltisb $outmask,-1
1293 lvx $outhead,0,$out
1294 ?vperm $outmask,$rndkey0,$outmask,$outperm
1295 le?vxor $outperm,$outperm,$tmp
1296
1297 lvx $rndkey0,0,$key
1298 mtctr $rounds
1299 lvx $rndkey1,$idx,$key
1300 addi $idx,$idx,16
1301 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1302 vxor $inout,$ivec,$rndkey0
1303 lvx $rndkey0,$idx,$key
1304 addi $idx,$idx,16
1305 b Loop_ctr32_enc
1306
1307 .align 5
1308 Loop_ctr32_enc:
1309 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1310 vcipher $inout,$inout,$rndkey1
1311 lvx $rndkey1,$idx,$key
1312 addi $idx,$idx,16
1313 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1314 vcipher $inout,$inout,$rndkey0
1315 lvx $rndkey0,$idx,$key
1316 addi $idx,$idx,16
1317 bdnz Loop_ctr32_enc
1318
1319 vadduwm $ivec,$ivec,$one
1320 vmr $dat,$inptail
1321 lvx $inptail,0,$inp
1322 addi $inp,$inp,16
1323 subic. $len,$len,1 # blocks--
1324
1325 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1326 vcipher $inout,$inout,$rndkey1
1327 lvx $rndkey1,$idx,$key
1328 vperm $dat,$dat,$inptail,$inpperm
1329 li $idx,16
1330 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1331 lvx $rndkey0,0,$key
1332 vxor $dat,$dat,$rndkey1 # last round key
1333 vcipherlast $inout,$inout,$dat
1334
1335 lvx $rndkey1,$idx,$key
1336 addi $idx,$idx,16
1337 vperm $inout,$inout,$inout,$outperm
1338 vsel $dat,$outhead,$inout,$outmask
1339 mtctr $rounds
1340 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1341 vmr $outhead,$inout
1342 vxor $inout,$ivec,$rndkey0
1343 lvx $rndkey0,$idx,$key
1344 addi $idx,$idx,16
1345 stvx $dat,0,$out
1346 addi $out,$out,16
1347 bne Loop_ctr32_enc
1348
1349 addi $out,$out,-1
1350 lvx $inout,0,$out # redundant in aligned case
1351 vsel $inout,$outhead,$inout,$outmask
1352 stvx $inout,0,$out
1353
1354 mtspr 256,$vrsave
1355 blr
1356 .long 0
1357 .byte 0,12,0x14,0,0,0,6,0
1358 .long 0
1359 ___
1360 #########################################################################
1361 {{ # Optimized CTR procedure #
1362 my $key_="r11";
1363 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1364 $x00=0 if ($flavour =~ /osx/);
1365 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1366 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1367 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1368 # v26-v31 last 6 round keys
1369 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1370 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1371
1372 $code.=<<___;
1373 .align 5
1374 _aesp8_ctr32_encrypt8x:
1375 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1376 li r10,`$FRAME+8*16+15`
1377 li r11,`$FRAME+8*16+31`
1378 stvx v20,r10,$sp # ABI says so
1379 addi r10,r10,32
1380 stvx v21,r11,$sp
1381 addi r11,r11,32
1382 stvx v22,r10,$sp
1383 addi r10,r10,32
1384 stvx v23,r11,$sp
1385 addi r11,r11,32
1386 stvx v24,r10,$sp
1387 addi r10,r10,32
1388 stvx v25,r11,$sp
1389 addi r11,r11,32
1390 stvx v26,r10,$sp
1391 addi r10,r10,32
1392 stvx v27,r11,$sp
1393 addi r11,r11,32
1394 stvx v28,r10,$sp
1395 addi r10,r10,32
1396 stvx v29,r11,$sp
1397 addi r11,r11,32
1398 stvx v30,r10,$sp
1399 stvx v31,r11,$sp
1400 li r0,-1
1401 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1402 li $x10,0x10
1403 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1404 li $x20,0x20
1405 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1406 li $x30,0x30
1407 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1408 li $x40,0x40
1409 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1410 li $x50,0x50
1411 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1412 li $x60,0x60
1413 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1414 li $x70,0x70
1415 mtspr 256,r0
1416
1417 subi $rounds,$rounds,3 # -4 in total
1418
1419 lvx $rndkey0,$x00,$key # load key schedule
1420 lvx v30,$x10,$key
1421 addi $key,$key,0x20
1422 lvx v31,$x00,$key
1423 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1424 addi $key_,$sp,$FRAME+15
1425 mtctr $rounds
1426
1427 Load_ctr32_enc_key:
1428 ?vperm v24,v30,v31,$keyperm
1429 lvx v30,$x10,$key
1430 addi $key,$key,0x20
1431 stvx v24,$x00,$key_ # off-load round[1]
1432 ?vperm v25,v31,v30,$keyperm
1433 lvx v31,$x00,$key
1434 stvx v25,$x10,$key_ # off-load round[2]
1435 addi $key_,$key_,0x20
1436 bdnz Load_ctr32_enc_key
1437
1438 lvx v26,$x10,$key
1439 ?vperm v24,v30,v31,$keyperm
1440 lvx v27,$x20,$key
1441 stvx v24,$x00,$key_ # off-load round[3]
1442 ?vperm v25,v31,v26,$keyperm
1443 lvx v28,$x30,$key
1444 stvx v25,$x10,$key_ # off-load round[4]
1445 addi $key_,$sp,$FRAME+15 # rewind $key_
1446 ?vperm v26,v26,v27,$keyperm
1447 lvx v29,$x40,$key
1448 ?vperm v27,v27,v28,$keyperm
1449 lvx v30,$x50,$key
1450 ?vperm v28,v28,v29,$keyperm
1451 lvx v31,$x60,$key
1452 ?vperm v29,v29,v30,$keyperm
1453 lvx $out0,$x70,$key # borrow $out0
1454 ?vperm v30,v30,v31,$keyperm
1455 lvx v24,$x00,$key_ # pre-load round[1]
1456 ?vperm v31,v31,$out0,$keyperm
1457 lvx v25,$x10,$key_ # pre-load round[2]
1458
1459 vadduwm $two,$one,$one
1460 subi $inp,$inp,15 # undo "caller"
1461 $SHL $len,$len,4
1462
1463 vadduwm $out1,$ivec,$one # counter values ...
1464 vadduwm $out2,$ivec,$two
1465 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1466 le?li $idx,8
1467 vadduwm $out3,$out1,$two
1468 vxor $out1,$out1,$rndkey0
1469 le?lvsl $inpperm,0,$idx
1470 vadduwm $out4,$out2,$two
1471 vxor $out2,$out2,$rndkey0
1472 le?vspltisb $tmp,0x0f
1473 vadduwm $out5,$out3,$two
1474 vxor $out3,$out3,$rndkey0
1475 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1476 vadduwm $out6,$out4,$two
1477 vxor $out4,$out4,$rndkey0
1478 vadduwm $out7,$out5,$two
1479 vxor $out5,$out5,$rndkey0
1480 vadduwm $ivec,$out6,$two # next counter value
1481 vxor $out6,$out6,$rndkey0
1482 vxor $out7,$out7,$rndkey0
1483
1484 mtctr $rounds
1485 b Loop_ctr32_enc8x
1486 .align 5
1487 Loop_ctr32_enc8x:
1488 vcipher $out0,$out0,v24
1489 vcipher $out1,$out1,v24
1490 vcipher $out2,$out2,v24
1491 vcipher $out3,$out3,v24
1492 vcipher $out4,$out4,v24
1493 vcipher $out5,$out5,v24
1494 vcipher $out6,$out6,v24
1495 vcipher $out7,$out7,v24
1496 Loop_ctr32_enc8x_middle:
1497 lvx v24,$x20,$key_ # round[3]
1498 addi $key_,$key_,0x20
1499
1500 vcipher $out0,$out0,v25
1501 vcipher $out1,$out1,v25
1502 vcipher $out2,$out2,v25
1503 vcipher $out3,$out3,v25
1504 vcipher $out4,$out4,v25
1505 vcipher $out5,$out5,v25
1506 vcipher $out6,$out6,v25
1507 vcipher $out7,$out7,v25
1508 lvx v25,$x10,$key_ # round[4]
1509 bdnz Loop_ctr32_enc8x
1510
1511 subic r11,$len,256 # $len-256, borrow $key_
1512 vcipher $out0,$out0,v24
1513 vcipher $out1,$out1,v24
1514 vcipher $out2,$out2,v24
1515 vcipher $out3,$out3,v24
1516 vcipher $out4,$out4,v24
1517 vcipher $out5,$out5,v24
1518 vcipher $out6,$out6,v24
1519 vcipher $out7,$out7,v24
1520
1521 subfe r0,r0,r0 # borrow?-1:0
1522 vcipher $out0,$out0,v25
1523 vcipher $out1,$out1,v25
1524 vcipher $out2,$out2,v25
1525 vcipher $out3,$out3,v25
1526 vcipher $out4,$out4,v25
1527 vcipher $out5,$out5,v25
1528 vcipher $out6,$out6,v25
1529 vcipher $out7,$out7,v25
1530
1531 and r0,r0,r11
1532 addi $key_,$sp,$FRAME+15 # rewind $key_
1533 vcipher $out0,$out0,v26
1534 vcipher $out1,$out1,v26
1535 vcipher $out2,$out2,v26
1536 vcipher $out3,$out3,v26
1537 vcipher $out4,$out4,v26
1538 vcipher $out5,$out5,v26
1539 vcipher $out6,$out6,v26
1540 vcipher $out7,$out7,v26
1541 lvx v24,$x00,$key_ # re-pre-load round[1]
1542
1543 subic $len,$len,129 # $len-=129
1544 vcipher $out0,$out0,v27
1545 addi $len,$len,1 # $len-=128 really
1546 vcipher $out1,$out1,v27
1547 vcipher $out2,$out2,v27
1548 vcipher $out3,$out3,v27
1549 vcipher $out4,$out4,v27
1550 vcipher $out5,$out5,v27
1551 vcipher $out6,$out6,v27
1552 vcipher $out7,$out7,v27
1553 lvx v25,$x10,$key_ # re-pre-load round[2]
1554
1555 vcipher $out0,$out0,v28
1556 lvx_u $in0,$x00,$inp # load input
1557 vcipher $out1,$out1,v28
1558 lvx_u $in1,$x10,$inp
1559 vcipher $out2,$out2,v28
1560 lvx_u $in2,$x20,$inp
1561 vcipher $out3,$out3,v28
1562 lvx_u $in3,$x30,$inp
1563 vcipher $out4,$out4,v28
1564 lvx_u $in4,$x40,$inp
1565 vcipher $out5,$out5,v28
1566 lvx_u $in5,$x50,$inp
1567 vcipher $out6,$out6,v28
1568 lvx_u $in6,$x60,$inp
1569 vcipher $out7,$out7,v28
1570 lvx_u $in7,$x70,$inp
1571 addi $inp,$inp,0x80
1572
1573 vcipher $out0,$out0,v29
1574 le?vperm $in0,$in0,$in0,$inpperm
1575 vcipher $out1,$out1,v29
1576 le?vperm $in1,$in1,$in1,$inpperm
1577 vcipher $out2,$out2,v29
1578 le?vperm $in2,$in2,$in2,$inpperm
1579 vcipher $out3,$out3,v29
1580 le?vperm $in3,$in3,$in3,$inpperm
1581 vcipher $out4,$out4,v29
1582 le?vperm $in4,$in4,$in4,$inpperm
1583 vcipher $out5,$out5,v29
1584 le?vperm $in5,$in5,$in5,$inpperm
1585 vcipher $out6,$out6,v29
1586 le?vperm $in6,$in6,$in6,$inpperm
1587 vcipher $out7,$out7,v29
1588 le?vperm $in7,$in7,$in7,$inpperm
1589
1590 add $inp,$inp,r0 # $inp is adjusted in such
1591 # way that at exit from the
1592 # loop inX-in7 are loaded
1593 # with last "words"
1594 subfe. r0,r0,r0 # borrow?-1:0
1595 vcipher $out0,$out0,v30
1596 vxor $in0,$in0,v31 # xor with last round key
1597 vcipher $out1,$out1,v30
1598 vxor $in1,$in1,v31
1599 vcipher $out2,$out2,v30
1600 vxor $in2,$in2,v31
1601 vcipher $out3,$out3,v30
1602 vxor $in3,$in3,v31
1603 vcipher $out4,$out4,v30
1604 vxor $in4,$in4,v31
1605 vcipher $out5,$out5,v30
1606 vxor $in5,$in5,v31
1607 vcipher $out6,$out6,v30
1608 vxor $in6,$in6,v31
1609 vcipher $out7,$out7,v30
1610 vxor $in7,$in7,v31
1611
1612 bne Lctr32_enc8x_break # did $len-129 borrow?
1613
1614 vcipherlast $in0,$out0,$in0
1615 vcipherlast $in1,$out1,$in1
1616 vadduwm $out1,$ivec,$one # counter values ...
1617 vcipherlast $in2,$out2,$in2
1618 vadduwm $out2,$ivec,$two
1619 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1620 vcipherlast $in3,$out3,$in3
1621 vadduwm $out3,$out1,$two
1622 vxor $out1,$out1,$rndkey0
1623 vcipherlast $in4,$out4,$in4
1624 vadduwm $out4,$out2,$two
1625 vxor $out2,$out2,$rndkey0
1626 vcipherlast $in5,$out5,$in5
1627 vadduwm $out5,$out3,$two
1628 vxor $out3,$out3,$rndkey0
1629 vcipherlast $in6,$out6,$in6
1630 vadduwm $out6,$out4,$two
1631 vxor $out4,$out4,$rndkey0
1632 vcipherlast $in7,$out7,$in7
1633 vadduwm $out7,$out5,$two
1634 vxor $out5,$out5,$rndkey0
1635 le?vperm $in0,$in0,$in0,$inpperm
1636 vadduwm $ivec,$out6,$two # next counter value
1637 vxor $out6,$out6,$rndkey0
1638 le?vperm $in1,$in1,$in1,$inpperm
1639 vxor $out7,$out7,$rndkey0
1640 mtctr $rounds
1641
1642 vcipher $out0,$out0,v24
1643 stvx_u $in0,$x00,$out
1644 le?vperm $in2,$in2,$in2,$inpperm
1645 vcipher $out1,$out1,v24
1646 stvx_u $in1,$x10,$out
1647 le?vperm $in3,$in3,$in3,$inpperm
1648 vcipher $out2,$out2,v24
1649 stvx_u $in2,$x20,$out
1650 le?vperm $in4,$in4,$in4,$inpperm
1651 vcipher $out3,$out3,v24
1652 stvx_u $in3,$x30,$out
1653 le?vperm $in5,$in5,$in5,$inpperm
1654 vcipher $out4,$out4,v24
1655 stvx_u $in4,$x40,$out
1656 le?vperm $in6,$in6,$in6,$inpperm
1657 vcipher $out5,$out5,v24
1658 stvx_u $in5,$x50,$out
1659 le?vperm $in7,$in7,$in7,$inpperm
1660 vcipher $out6,$out6,v24
1661 stvx_u $in6,$x60,$out
1662 vcipher $out7,$out7,v24
1663 stvx_u $in7,$x70,$out
1664 addi $out,$out,0x80
1665
1666 b Loop_ctr32_enc8x_middle
1667
1668 .align 5
1669 Lctr32_enc8x_break:
1670 cmpwi $len,-0x60
1671 blt Lctr32_enc8x_one
1672 nop
1673 beq Lctr32_enc8x_two
1674 cmpwi $len,-0x40
1675 blt Lctr32_enc8x_three
1676 nop
1677 beq Lctr32_enc8x_four
1678 cmpwi $len,-0x20
1679 blt Lctr32_enc8x_five
1680 nop
1681 beq Lctr32_enc8x_six
1682 cmpwi $len,0x00
1683 blt Lctr32_enc8x_seven
1684
1685 Lctr32_enc8x_eight:
1686 vcipherlast $out0,$out0,$in0
1687 vcipherlast $out1,$out1,$in1
1688 vcipherlast $out2,$out2,$in2
1689 vcipherlast $out3,$out3,$in3
1690 vcipherlast $out4,$out4,$in4
1691 vcipherlast $out5,$out5,$in5
1692 vcipherlast $out6,$out6,$in6
1693 vcipherlast $out7,$out7,$in7
1694
1695 le?vperm $out0,$out0,$out0,$inpperm
1696 le?vperm $out1,$out1,$out1,$inpperm
1697 stvx_u $out0,$x00,$out
1698 le?vperm $out2,$out2,$out2,$inpperm
1699 stvx_u $out1,$x10,$out
1700 le?vperm $out3,$out3,$out3,$inpperm
1701 stvx_u $out2,$x20,$out
1702 le?vperm $out4,$out4,$out4,$inpperm
1703 stvx_u $out3,$x30,$out
1704 le?vperm $out5,$out5,$out5,$inpperm
1705 stvx_u $out4,$x40,$out
1706 le?vperm $out6,$out6,$out6,$inpperm
1707 stvx_u $out5,$x50,$out
1708 le?vperm $out7,$out7,$out7,$inpperm
1709 stvx_u $out6,$x60,$out
1710 stvx_u $out7,$x70,$out
1711 addi $out,$out,0x80
1712 b Lctr32_enc8x_done
1713
1714 .align 5
1715 Lctr32_enc8x_seven:
1716 vcipherlast $out0,$out0,$in1
1717 vcipherlast $out1,$out1,$in2
1718 vcipherlast $out2,$out2,$in3
1719 vcipherlast $out3,$out3,$in4
1720 vcipherlast $out4,$out4,$in5
1721 vcipherlast $out5,$out5,$in6
1722 vcipherlast $out6,$out6,$in7
1723
1724 le?vperm $out0,$out0,$out0,$inpperm
1725 le?vperm $out1,$out1,$out1,$inpperm
1726 stvx_u $out0,$x00,$out
1727 le?vperm $out2,$out2,$out2,$inpperm
1728 stvx_u $out1,$x10,$out
1729 le?vperm $out3,$out3,$out3,$inpperm
1730 stvx_u $out2,$x20,$out
1731 le?vperm $out4,$out4,$out4,$inpperm
1732 stvx_u $out3,$x30,$out
1733 le?vperm $out5,$out5,$out5,$inpperm
1734 stvx_u $out4,$x40,$out
1735 le?vperm $out6,$out6,$out6,$inpperm
1736 stvx_u $out5,$x50,$out
1737 stvx_u $out6,$x60,$out
1738 addi $out,$out,0x70
1739 b Lctr32_enc8x_done
1740
1741 .align 5
1742 Lctr32_enc8x_six:
1743 vcipherlast $out0,$out0,$in2
1744 vcipherlast $out1,$out1,$in3
1745 vcipherlast $out2,$out2,$in4
1746 vcipherlast $out3,$out3,$in5
1747 vcipherlast $out4,$out4,$in6
1748 vcipherlast $out5,$out5,$in7
1749
1750 le?vperm $out0,$out0,$out0,$inpperm
1751 le?vperm $out1,$out1,$out1,$inpperm
1752 stvx_u $out0,$x00,$out
1753 le?vperm $out2,$out2,$out2,$inpperm
1754 stvx_u $out1,$x10,$out
1755 le?vperm $out3,$out3,$out3,$inpperm
1756 stvx_u $out2,$x20,$out
1757 le?vperm $out4,$out4,$out4,$inpperm
1758 stvx_u $out3,$x30,$out
1759 le?vperm $out5,$out5,$out5,$inpperm
1760 stvx_u $out4,$x40,$out
1761 stvx_u $out5,$x50,$out
1762 addi $out,$out,0x60
1763 b Lctr32_enc8x_done
1764
1765 .align 5
1766 Lctr32_enc8x_five:
1767 vcipherlast $out0,$out0,$in3
1768 vcipherlast $out1,$out1,$in4
1769 vcipherlast $out2,$out2,$in5
1770 vcipherlast $out3,$out3,$in6
1771 vcipherlast $out4,$out4,$in7
1772
1773 le?vperm $out0,$out0,$out0,$inpperm
1774 le?vperm $out1,$out1,$out1,$inpperm
1775 stvx_u $out0,$x00,$out
1776 le?vperm $out2,$out2,$out2,$inpperm
1777 stvx_u $out1,$x10,$out
1778 le?vperm $out3,$out3,$out3,$inpperm
1779 stvx_u $out2,$x20,$out
1780 le?vperm $out4,$out4,$out4,$inpperm
1781 stvx_u $out3,$x30,$out
1782 stvx_u $out4,$x40,$out
1783 addi $out,$out,0x50
1784 b Lctr32_enc8x_done
1785
1786 .align 5
1787 Lctr32_enc8x_four:
1788 vcipherlast $out0,$out0,$in4
1789 vcipherlast $out1,$out1,$in5
1790 vcipherlast $out2,$out2,$in6
1791 vcipherlast $out3,$out3,$in7
1792
1793 le?vperm $out0,$out0,$out0,$inpperm
1794 le?vperm $out1,$out1,$out1,$inpperm
1795 stvx_u $out0,$x00,$out
1796 le?vperm $out2,$out2,$out2,$inpperm
1797 stvx_u $out1,$x10,$out
1798 le?vperm $out3,$out3,$out3,$inpperm
1799 stvx_u $out2,$x20,$out
1800 stvx_u $out3,$x30,$out
1801 addi $out,$out,0x40
1802 b Lctr32_enc8x_done
1803
1804 .align 5
1805 Lctr32_enc8x_three:
1806 vcipherlast $out0,$out0,$in5
1807 vcipherlast $out1,$out1,$in6
1808 vcipherlast $out2,$out2,$in7
1809
1810 le?vperm $out0,$out0,$out0,$inpperm
1811 le?vperm $out1,$out1,$out1,$inpperm
1812 stvx_u $out0,$x00,$out
1813 le?vperm $out2,$out2,$out2,$inpperm
1814 stvx_u $out1,$x10,$out
1815 stvx_u $out2,$x20,$out
1816 addi $out,$out,0x30
1817 b Lcbc_dec8x_done
1818
1819 .align 5
1820 Lctr32_enc8x_two:
1821 vcipherlast $out0,$out0,$in6
1822 vcipherlast $out1,$out1,$in7
1823
1824 le?vperm $out0,$out0,$out0,$inpperm
1825 le?vperm $out1,$out1,$out1,$inpperm
1826 stvx_u $out0,$x00,$out
1827 stvx_u $out1,$x10,$out
1828 addi $out,$out,0x20
1829 b Lcbc_dec8x_done
1830
1831 .align 5
1832 Lctr32_enc8x_one:
1833 vcipherlast $out0,$out0,$in7
1834
1835 le?vperm $out0,$out0,$out0,$inpperm
1836 stvx_u $out0,0,$out
1837 addi $out,$out,0x10
1838
1839 Lctr32_enc8x_done:
1840 li r10,`$FRAME+15`
1841 li r11,`$FRAME+31`
1842 stvx $inpperm,r10,$sp # wipe copies of round keys
1843 addi r10,r10,32
1844 stvx $inpperm,r11,$sp
1845 addi r11,r11,32
1846 stvx $inpperm,r10,$sp
1847 addi r10,r10,32
1848 stvx $inpperm,r11,$sp
1849 addi r11,r11,32
1850 stvx $inpperm,r10,$sp
1851 addi r10,r10,32
1852 stvx $inpperm,r11,$sp
1853 addi r11,r11,32
1854 stvx $inpperm,r10,$sp
1855 addi r10,r10,32
1856 stvx $inpperm,r11,$sp
1857 addi r11,r11,32
1858
1859 mtspr 256,$vrsave
1860 lvx v20,r10,$sp # ABI says so
1861 addi r10,r10,32
1862 lvx v21,r11,$sp
1863 addi r11,r11,32
1864 lvx v22,r10,$sp
1865 addi r10,r10,32
1866 lvx v23,r11,$sp
1867 addi r11,r11,32
1868 lvx v24,r10,$sp
1869 addi r10,r10,32
1870 lvx v25,r11,$sp
1871 addi r11,r11,32
1872 lvx v26,r10,$sp
1873 addi r10,r10,32
1874 lvx v27,r11,$sp
1875 addi r11,r11,32
1876 lvx v28,r10,$sp
1877 addi r10,r10,32
1878 lvx v29,r11,$sp
1879 addi r11,r11,32
1880 lvx v30,r10,$sp
1881 lvx v31,r11,$sp
1882 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1883 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1884 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1885 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1886 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1887 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1888 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1889 blr
1890 .long 0
1891 .byte 0,12,0x04,0,0x80,6,6,0
1892 .long 0
1893 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1894 ___
1895 }} }}}
1896
1897 my $consts=1;
1898 foreach(split("\n",$code)) {
1899 s/\`([^\`]*)\`/eval($1)/geo;
1900
1901 # constants table endian-specific conversion
1902 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1903 my $conv=$3;
1904 my @bytes=();
1905
1906 # convert to endian-agnostic format
1907 if ($1 eq "long") {
1908 foreach (split(/,\s*/,$2)) {
1909 my $l = /^0/?oct:int;
1910 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1911 }
1912 } else {
1913 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1914 }
1915
1916 # little-endian conversion
1917 if ($flavour =~ /le$/o) {
1918 SWITCH: for($conv) {
1919 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1920 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1921 }
1922 }
1923
1924 #emit
1925 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1926 next;
1927 }
1928 $consts=0 if (m/Lconsts:/o); # end of table
1929
1930 # instructions prefixed with '?' are endian-specific and need
1931 # to be adjusted accordingly...
1932 if ($flavour =~ /le$/o) { # little-endian
1933 s/le\?//o or
1934 s/be\?/#be#/o or
1935 s/\?lvsr/lvsl/o or
1936 s/\?lvsl/lvsr/o or
1937 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1938 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1939 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1940 } else { # big-endian
1941 s/le\?/#le#/o or
1942 s/be\?//o or
1943 s/\?([a-z]+)/$1/o;
1944 }
1945
1946 print $_,"\n";
1947 }
1948
1949 close STDOUT;