]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aesp8-ppc.pl
PowerPC assembly pack: add POWER8 support.
[thirdparty/openssl.git] / crypto / aes / asm / aesp8-ppc.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
23 #
24 # May 2016
25 #
26 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
27 # systems were measured.
28 #
29 ######################################################################
30 # Current large-block performance in cycles per byte processed with
31 # 128-bit key (less is better).
32 #
33 # CBC en-/decrypt CTR XTS
34 # POWER8[le] 3.96/0.72 0.74 1.1
35 # POWER8[be] 3.75/0.65 0.66 1.0
36
37 $flavour = shift;
38
39 if ($flavour =~ /64/) {
40 $SIZE_T =8;
41 $LRSAVE =2*$SIZE_T;
42 $STU ="stdu";
43 $POP ="ld";
44 $PUSH ="std";
45 $UCMP ="cmpld";
46 $SHL ="sldi";
47 } elsif ($flavour =~ /32/) {
48 $SIZE_T =4;
49 $LRSAVE =$SIZE_T;
50 $STU ="stwu";
51 $POP ="lwz";
52 $PUSH ="stw";
53 $UCMP ="cmplw";
54 $SHL ="slwi";
55 } else { die "nonsense $flavour"; }
56
57 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
58
59 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
60 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
61 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
62 die "can't locate ppc-xlate.pl";
63
64 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
65
66 $FRAME=8*$SIZE_T;
67 $prefix="aes_p8";
68
69 $sp="r1";
70 $vrsave="r12";
71
72 #########################################################################
73 {{{ # Key setup procedures #
74 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
75 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
76 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
77
78 $code.=<<___;
79 .machine "any"
80
81 .text
82
83 .align 7
84 rcon:
85 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
86 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
87 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
88 .long 0,0,0,0 ?asis
89 Lconsts:
90 mflr r0
91 bcl 20,31,\$+4
92 mflr $ptr #vvvvv "distance between . and rcon
93 addi $ptr,$ptr,-0x48
94 mtlr r0
95 blr
96 .long 0
97 .byte 0,12,0x14,0,0,0,0,0
98 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
99
100 .globl .${prefix}_set_encrypt_key
101 .align 5
102 .${prefix}_set_encrypt_key:
103 Lset_encrypt_key:
104 mflr r11
105 $PUSH r11,$LRSAVE($sp)
106
107 li $ptr,-1
108 ${UCMP}i $inp,0
109 beq- Lenc_key_abort # if ($inp==0) return -1;
110 ${UCMP}i $out,0
111 beq- Lenc_key_abort # if ($out==0) return -1;
112 li $ptr,-2
113 cmpwi $bits,128
114 blt- Lenc_key_abort
115 cmpwi $bits,256
116 bgt- Lenc_key_abort
117 andi. r0,$bits,0x3f
118 bne- Lenc_key_abort
119
120 lis r0,0xfff0
121 mfspr $vrsave,256
122 mtspr 256,r0
123
124 bl Lconsts
125 mtlr r11
126
127 neg r9,$inp
128 lvx $in0,0,$inp
129 addi $inp,$inp,15 # 15 is not typo
130 lvsr $key,0,r9 # borrow $key
131 li r8,0x20
132 cmpwi $bits,192
133 lvx $in1,0,$inp
134 le?vspltisb $mask,0x0f # borrow $mask
135 lvx $rcon,0,$ptr
136 le?vxor $key,$key,$mask # adjust for byte swap
137 lvx $mask,r8,$ptr
138 addi $ptr,$ptr,0x10
139 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
140 li $cnt,8
141 vxor $zero,$zero,$zero
142 mtctr $cnt
143
144 ?lvsr $outperm,0,$out
145 vspltisb $outmask,-1
146 lvx $outhead,0,$out
147 ?vperm $outmask,$zero,$outmask,$outperm
148
149 blt Loop128
150 addi $inp,$inp,8
151 beq L192
152 addi $inp,$inp,8
153 b L256
154
155 .align 4
156 Loop128:
157 vperm $key,$in0,$in0,$mask # rotate-n-splat
158 vsldoi $tmp,$zero,$in0,12 # >>32
159 vperm $outtail,$in0,$in0,$outperm # rotate
160 vsel $stage,$outhead,$outtail,$outmask
161 vmr $outhead,$outtail
162 vcipherlast $key,$key,$rcon
163 stvx $stage,0,$out
164 addi $out,$out,16
165
166 vxor $in0,$in0,$tmp
167 vsldoi $tmp,$zero,$tmp,12 # >>32
168 vxor $in0,$in0,$tmp
169 vsldoi $tmp,$zero,$tmp,12 # >>32
170 vxor $in0,$in0,$tmp
171 vadduwm $rcon,$rcon,$rcon
172 vxor $in0,$in0,$key
173 bdnz Loop128
174
175 lvx $rcon,0,$ptr # last two round keys
176
177 vperm $key,$in0,$in0,$mask # rotate-n-splat
178 vsldoi $tmp,$zero,$in0,12 # >>32
179 vperm $outtail,$in0,$in0,$outperm # rotate
180 vsel $stage,$outhead,$outtail,$outmask
181 vmr $outhead,$outtail
182 vcipherlast $key,$key,$rcon
183 stvx $stage,0,$out
184 addi $out,$out,16
185
186 vxor $in0,$in0,$tmp
187 vsldoi $tmp,$zero,$tmp,12 # >>32
188 vxor $in0,$in0,$tmp
189 vsldoi $tmp,$zero,$tmp,12 # >>32
190 vxor $in0,$in0,$tmp
191 vadduwm $rcon,$rcon,$rcon
192 vxor $in0,$in0,$key
193
194 vperm $key,$in0,$in0,$mask # rotate-n-splat
195 vsldoi $tmp,$zero,$in0,12 # >>32
196 vperm $outtail,$in0,$in0,$outperm # rotate
197 vsel $stage,$outhead,$outtail,$outmask
198 vmr $outhead,$outtail
199 vcipherlast $key,$key,$rcon
200 stvx $stage,0,$out
201 addi $out,$out,16
202
203 vxor $in0,$in0,$tmp
204 vsldoi $tmp,$zero,$tmp,12 # >>32
205 vxor $in0,$in0,$tmp
206 vsldoi $tmp,$zero,$tmp,12 # >>32
207 vxor $in0,$in0,$tmp
208 vxor $in0,$in0,$key
209 vperm $outtail,$in0,$in0,$outperm # rotate
210 vsel $stage,$outhead,$outtail,$outmask
211 vmr $outhead,$outtail
212 stvx $stage,0,$out
213
214 addi $inp,$out,15 # 15 is not typo
215 addi $out,$out,0x50
216
217 li $rounds,10
218 b Ldone
219
220 .align 4
221 L192:
222 lvx $tmp,0,$inp
223 li $cnt,4
224 vperm $outtail,$in0,$in0,$outperm # rotate
225 vsel $stage,$outhead,$outtail,$outmask
226 vmr $outhead,$outtail
227 stvx $stage,0,$out
228 addi $out,$out,16
229 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
230 vspltisb $key,8 # borrow $key
231 mtctr $cnt
232 vsububm $mask,$mask,$key # adjust the mask
233
234 Loop192:
235 vperm $key,$in1,$in1,$mask # roate-n-splat
236 vsldoi $tmp,$zero,$in0,12 # >>32
237 vcipherlast $key,$key,$rcon
238
239 vxor $in0,$in0,$tmp
240 vsldoi $tmp,$zero,$tmp,12 # >>32
241 vxor $in0,$in0,$tmp
242 vsldoi $tmp,$zero,$tmp,12 # >>32
243 vxor $in0,$in0,$tmp
244
245 vsldoi $stage,$zero,$in1,8
246 vspltw $tmp,$in0,3
247 vxor $tmp,$tmp,$in1
248 vsldoi $in1,$zero,$in1,12 # >>32
249 vadduwm $rcon,$rcon,$rcon
250 vxor $in1,$in1,$tmp
251 vxor $in0,$in0,$key
252 vxor $in1,$in1,$key
253 vsldoi $stage,$stage,$in0,8
254
255 vperm $key,$in1,$in1,$mask # rotate-n-splat
256 vsldoi $tmp,$zero,$in0,12 # >>32
257 vperm $outtail,$stage,$stage,$outperm # rotate
258 vsel $stage,$outhead,$outtail,$outmask
259 vmr $outhead,$outtail
260 vcipherlast $key,$key,$rcon
261 stvx $stage,0,$out
262 addi $out,$out,16
263
264 vsldoi $stage,$in0,$in1,8
265 vxor $in0,$in0,$tmp
266 vsldoi $tmp,$zero,$tmp,12 # >>32
267 vperm $outtail,$stage,$stage,$outperm # rotate
268 vsel $stage,$outhead,$outtail,$outmask
269 vmr $outhead,$outtail
270 vxor $in0,$in0,$tmp
271 vsldoi $tmp,$zero,$tmp,12 # >>32
272 vxor $in0,$in0,$tmp
273 stvx $stage,0,$out
274 addi $out,$out,16
275
276 vspltw $tmp,$in0,3
277 vxor $tmp,$tmp,$in1
278 vsldoi $in1,$zero,$in1,12 # >>32
279 vadduwm $rcon,$rcon,$rcon
280 vxor $in1,$in1,$tmp
281 vxor $in0,$in0,$key
282 vxor $in1,$in1,$key
283 vperm $outtail,$in0,$in0,$outperm # rotate
284 vsel $stage,$outhead,$outtail,$outmask
285 vmr $outhead,$outtail
286 stvx $stage,0,$out
287 addi $inp,$out,15 # 15 is not typo
288 addi $out,$out,16
289 bdnz Loop192
290
291 li $rounds,12
292 addi $out,$out,0x20
293 b Ldone
294
295 .align 4
296 L256:
297 lvx $tmp,0,$inp
298 li $cnt,7
299 li $rounds,14
300 vperm $outtail,$in0,$in0,$outperm # rotate
301 vsel $stage,$outhead,$outtail,$outmask
302 vmr $outhead,$outtail
303 stvx $stage,0,$out
304 addi $out,$out,16
305 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
306 mtctr $cnt
307
308 Loop256:
309 vperm $key,$in1,$in1,$mask # rotate-n-splat
310 vsldoi $tmp,$zero,$in0,12 # >>32
311 vperm $outtail,$in1,$in1,$outperm # rotate
312 vsel $stage,$outhead,$outtail,$outmask
313 vmr $outhead,$outtail
314 vcipherlast $key,$key,$rcon
315 stvx $stage,0,$out
316 addi $out,$out,16
317
318 vxor $in0,$in0,$tmp
319 vsldoi $tmp,$zero,$tmp,12 # >>32
320 vxor $in0,$in0,$tmp
321 vsldoi $tmp,$zero,$tmp,12 # >>32
322 vxor $in0,$in0,$tmp
323 vadduwm $rcon,$rcon,$rcon
324 vxor $in0,$in0,$key
325 vperm $outtail,$in0,$in0,$outperm # rotate
326 vsel $stage,$outhead,$outtail,$outmask
327 vmr $outhead,$outtail
328 stvx $stage,0,$out
329 addi $inp,$out,15 # 15 is not typo
330 addi $out,$out,16
331 bdz Ldone
332
333 vspltw $key,$in0,3 # just splat
334 vsldoi $tmp,$zero,$in1,12 # >>32
335 vsbox $key,$key
336
337 vxor $in1,$in1,$tmp
338 vsldoi $tmp,$zero,$tmp,12 # >>32
339 vxor $in1,$in1,$tmp
340 vsldoi $tmp,$zero,$tmp,12 # >>32
341 vxor $in1,$in1,$tmp
342
343 vxor $in1,$in1,$key
344 b Loop256
345
346 .align 4
347 Ldone:
348 lvx $in1,0,$inp # redundant in aligned case
349 vsel $in1,$outhead,$in1,$outmask
350 stvx $in1,0,$inp
351 li $ptr,0
352 mtspr 256,$vrsave
353 stw $rounds,0($out)
354
355 Lenc_key_abort:
356 mr r3,$ptr
357 blr
358 .long 0
359 .byte 0,12,0x14,1,0,0,3,0
360 .long 0
361 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
362
363 .globl .${prefix}_set_decrypt_key
364 .align 5
365 .${prefix}_set_decrypt_key:
366 $STU $sp,-$FRAME($sp)
367 mflr r10
368 $PUSH r10,$FRAME+$LRSAVE($sp)
369 bl Lset_encrypt_key
370 mtlr r10
371
372 cmpwi r3,0
373 bne- Ldec_key_abort
374
375 slwi $cnt,$rounds,4
376 subi $inp,$out,240 # first round key
377 srwi $rounds,$rounds,1
378 add $out,$inp,$cnt # last round key
379 mtctr $rounds
380
381 Ldeckey:
382 lwz r0, 0($inp)
383 lwz r6, 4($inp)
384 lwz r7, 8($inp)
385 lwz r8, 12($inp)
386 addi $inp,$inp,16
387 lwz r9, 0($out)
388 lwz r10,4($out)
389 lwz r11,8($out)
390 lwz r12,12($out)
391 stw r0, 0($out)
392 stw r6, 4($out)
393 stw r7, 8($out)
394 stw r8, 12($out)
395 subi $out,$out,16
396 stw r9, -16($inp)
397 stw r10,-12($inp)
398 stw r11,-8($inp)
399 stw r12,-4($inp)
400 bdnz Ldeckey
401
402 xor r3,r3,r3 # return value
403 Ldec_key_abort:
404 addi $sp,$sp,$FRAME
405 blr
406 .long 0
407 .byte 0,12,4,1,0x80,0,3,0
408 .long 0
409 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
410 ___
411 }}}
412 #########################################################################
413 {{{ # Single block en- and decrypt procedures #
414 sub gen_block () {
415 my $dir = shift;
416 my $n = $dir eq "de" ? "n" : "";
417 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
418
419 $code.=<<___;
420 .globl .${prefix}_${dir}crypt
421 .align 5
422 .${prefix}_${dir}crypt:
423 lwz $rounds,240($key)
424 lis r0,0xfc00
425 mfspr $vrsave,256
426 li $idx,15 # 15 is not typo
427 mtspr 256,r0
428
429 lvx v0,0,$inp
430 neg r11,$out
431 lvx v1,$idx,$inp
432 lvsl v2,0,$inp # inpperm
433 le?vspltisb v4,0x0f
434 ?lvsl v3,0,r11 # outperm
435 le?vxor v2,v2,v4
436 li $idx,16
437 vperm v0,v0,v1,v2 # align [and byte swap in LE]
438 lvx v1,0,$key
439 ?lvsl v5,0,$key # keyperm
440 srwi $rounds,$rounds,1
441 lvx v2,$idx,$key
442 addi $idx,$idx,16
443 subi $rounds,$rounds,1
444 ?vperm v1,v1,v2,v5 # align round key
445
446 vxor v0,v0,v1
447 lvx v1,$idx,$key
448 addi $idx,$idx,16
449 mtctr $rounds
450
451 Loop_${dir}c:
452 ?vperm v2,v2,v1,v5
453 v${n}cipher v0,v0,v2
454 lvx v2,$idx,$key
455 addi $idx,$idx,16
456 ?vperm v1,v1,v2,v5
457 v${n}cipher v0,v0,v1
458 lvx v1,$idx,$key
459 addi $idx,$idx,16
460 bdnz Loop_${dir}c
461
462 ?vperm v2,v2,v1,v5
463 v${n}cipher v0,v0,v2
464 lvx v2,$idx,$key
465 ?vperm v1,v1,v2,v5
466 v${n}cipherlast v0,v0,v1
467
468 vspltisb v2,-1
469 vxor v1,v1,v1
470 li $idx,15 # 15 is not typo
471 ?vperm v2,v1,v2,v3 # outmask
472 le?vxor v3,v3,v4
473 lvx v1,0,$out # outhead
474 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
475 vsel v1,v1,v0,v2
476 lvx v4,$idx,$out
477 stvx v1,0,$out
478 vsel v0,v0,v4,v2
479 stvx v0,$idx,$out
480
481 mtspr 256,$vrsave
482 blr
483 .long 0
484 .byte 0,12,0x14,0,0,0,3,0
485 .long 0
486 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
487 ___
488 }
489 &gen_block("en");
490 &gen_block("de");
491 }}}
492 #########################################################################
493 {{{ # CBC en- and decrypt procedures #
494 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
495 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
496 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
497 map("v$_",(4..10));
498 $code.=<<___;
499 .globl .${prefix}_cbc_encrypt
500 .align 5
501 .${prefix}_cbc_encrypt:
502 ${UCMP}i $len,16
503 bltlr-
504
505 cmpwi $enc,0 # test direction
506 lis r0,0xffe0
507 mfspr $vrsave,256
508 mtspr 256,r0
509
510 li $idx,15
511 vxor $rndkey0,$rndkey0,$rndkey0
512 le?vspltisb $tmp,0x0f
513
514 lvx $ivec,0,$ivp # load [unaligned] iv
515 lvsl $inpperm,0,$ivp
516 lvx $inptail,$idx,$ivp
517 le?vxor $inpperm,$inpperm,$tmp
518 vperm $ivec,$ivec,$inptail,$inpperm
519
520 neg r11,$inp
521 ?lvsl $keyperm,0,$key # prepare for unaligned key
522 lwz $rounds,240($key)
523
524 lvsr $inpperm,0,r11 # prepare for unaligned load
525 lvx $inptail,0,$inp
526 addi $inp,$inp,15 # 15 is not typo
527 le?vxor $inpperm,$inpperm,$tmp
528
529 ?lvsr $outperm,0,$out # prepare for unaligned store
530 vspltisb $outmask,-1
531 lvx $outhead,0,$out
532 ?vperm $outmask,$rndkey0,$outmask,$outperm
533 le?vxor $outperm,$outperm,$tmp
534
535 srwi $rounds,$rounds,1
536 li $idx,16
537 subi $rounds,$rounds,1
538 beq Lcbc_dec
539
540 Lcbc_enc:
541 vmr $inout,$inptail
542 lvx $inptail,0,$inp
543 addi $inp,$inp,16
544 mtctr $rounds
545 subi $len,$len,16 # len-=16
546
547 lvx $rndkey0,0,$key
548 vperm $inout,$inout,$inptail,$inpperm
549 lvx $rndkey1,$idx,$key
550 addi $idx,$idx,16
551 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
552 vxor $inout,$inout,$rndkey0
553 lvx $rndkey0,$idx,$key
554 addi $idx,$idx,16
555 vxor $inout,$inout,$ivec
556
557 Loop_cbc_enc:
558 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
559 vcipher $inout,$inout,$rndkey1
560 lvx $rndkey1,$idx,$key
561 addi $idx,$idx,16
562 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
563 vcipher $inout,$inout,$rndkey0
564 lvx $rndkey0,$idx,$key
565 addi $idx,$idx,16
566 bdnz Loop_cbc_enc
567
568 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
569 vcipher $inout,$inout,$rndkey1
570 lvx $rndkey1,$idx,$key
571 li $idx,16
572 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
573 vcipherlast $ivec,$inout,$rndkey0
574 ${UCMP}i $len,16
575
576 vperm $tmp,$ivec,$ivec,$outperm
577 vsel $inout,$outhead,$tmp,$outmask
578 vmr $outhead,$tmp
579 stvx $inout,0,$out
580 addi $out,$out,16
581 bge Lcbc_enc
582
583 b Lcbc_done
584
585 .align 4
586 Lcbc_dec:
587 ${UCMP}i $len,128
588 bge _aesp8_cbc_decrypt8x
589 vmr $tmp,$inptail
590 lvx $inptail,0,$inp
591 addi $inp,$inp,16
592 mtctr $rounds
593 subi $len,$len,16 # len-=16
594
595 lvx $rndkey0,0,$key
596 vperm $tmp,$tmp,$inptail,$inpperm
597 lvx $rndkey1,$idx,$key
598 addi $idx,$idx,16
599 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
600 vxor $inout,$tmp,$rndkey0
601 lvx $rndkey0,$idx,$key
602 addi $idx,$idx,16
603
604 Loop_cbc_dec:
605 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
606 vncipher $inout,$inout,$rndkey1
607 lvx $rndkey1,$idx,$key
608 addi $idx,$idx,16
609 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
610 vncipher $inout,$inout,$rndkey0
611 lvx $rndkey0,$idx,$key
612 addi $idx,$idx,16
613 bdnz Loop_cbc_dec
614
615 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
616 vncipher $inout,$inout,$rndkey1
617 lvx $rndkey1,$idx,$key
618 li $idx,16
619 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
620 vncipherlast $inout,$inout,$rndkey0
621 ${UCMP}i $len,16
622
623 vxor $inout,$inout,$ivec
624 vmr $ivec,$tmp
625 vperm $tmp,$inout,$inout,$outperm
626 vsel $inout,$outhead,$tmp,$outmask
627 vmr $outhead,$tmp
628 stvx $inout,0,$out
629 addi $out,$out,16
630 bge Lcbc_dec
631
632 Lcbc_done:
633 addi $out,$out,-1
634 lvx $inout,0,$out # redundant in aligned case
635 vsel $inout,$outhead,$inout,$outmask
636 stvx $inout,0,$out
637
638 neg $enc,$ivp # write [unaligned] iv
639 li $idx,15 # 15 is not typo
640 vxor $rndkey0,$rndkey0,$rndkey0
641 vspltisb $outmask,-1
642 le?vspltisb $tmp,0x0f
643 ?lvsl $outperm,0,$enc
644 ?vperm $outmask,$rndkey0,$outmask,$outperm
645 le?vxor $outperm,$outperm,$tmp
646 lvx $outhead,0,$ivp
647 vperm $ivec,$ivec,$ivec,$outperm
648 vsel $inout,$outhead,$ivec,$outmask
649 lvx $inptail,$idx,$ivp
650 stvx $inout,0,$ivp
651 vsel $inout,$ivec,$inptail,$outmask
652 stvx $inout,$idx,$ivp
653
654 mtspr 256,$vrsave
655 blr
656 .long 0
657 .byte 0,12,0x14,0,0,0,6,0
658 .long 0
659 ___
660 #########################################################################
661 {{ # Optimized CBC decrypt procedure #
662 my $key_="r11";
663 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
664 $x00=0 if ($flavour =~ /osx/);
665 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
666 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
667 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
668 # v26-v31 last 6 round keys
669 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
670
671 $code.=<<___;
672 .align 5
673 _aesp8_cbc_decrypt8x:
674 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
675 li r10,`$FRAME+8*16+15`
676 li r11,`$FRAME+8*16+31`
677 stvx v20,r10,$sp # ABI says so
678 addi r10,r10,32
679 stvx v21,r11,$sp
680 addi r11,r11,32
681 stvx v22,r10,$sp
682 addi r10,r10,32
683 stvx v23,r11,$sp
684 addi r11,r11,32
685 stvx v24,r10,$sp
686 addi r10,r10,32
687 stvx v25,r11,$sp
688 addi r11,r11,32
689 stvx v26,r10,$sp
690 addi r10,r10,32
691 stvx v27,r11,$sp
692 addi r11,r11,32
693 stvx v28,r10,$sp
694 addi r10,r10,32
695 stvx v29,r11,$sp
696 addi r11,r11,32
697 stvx v30,r10,$sp
698 stvx v31,r11,$sp
699 li r0,-1
700 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
701 li $x10,0x10
702 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
703 li $x20,0x20
704 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
705 li $x30,0x30
706 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
707 li $x40,0x40
708 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
709 li $x50,0x50
710 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
711 li $x60,0x60
712 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
713 li $x70,0x70
714 mtspr 256,r0
715
716 subi $rounds,$rounds,3 # -4 in total
717 subi $len,$len,128 # bias
718
719 lvx $rndkey0,$x00,$key # load key schedule
720 lvx v30,$x10,$key
721 addi $key,$key,0x20
722 lvx v31,$x00,$key
723 ?vperm $rndkey0,$rndkey0,v30,$keyperm
724 addi $key_,$sp,$FRAME+15
725 mtctr $rounds
726
727 Load_cbc_dec_key:
728 ?vperm v24,v30,v31,$keyperm
729 lvx v30,$x10,$key
730 addi $key,$key,0x20
731 stvx v24,$x00,$key_ # off-load round[1]
732 ?vperm v25,v31,v30,$keyperm
733 lvx v31,$x00,$key
734 stvx v25,$x10,$key_ # off-load round[2]
735 addi $key_,$key_,0x20
736 bdnz Load_cbc_dec_key
737
738 lvx v26,$x10,$key
739 ?vperm v24,v30,v31,$keyperm
740 lvx v27,$x20,$key
741 stvx v24,$x00,$key_ # off-load round[3]
742 ?vperm v25,v31,v26,$keyperm
743 lvx v28,$x30,$key
744 stvx v25,$x10,$key_ # off-load round[4]
745 addi $key_,$sp,$FRAME+15 # rewind $key_
746 ?vperm v26,v26,v27,$keyperm
747 lvx v29,$x40,$key
748 ?vperm v27,v27,v28,$keyperm
749 lvx v30,$x50,$key
750 ?vperm v28,v28,v29,$keyperm
751 lvx v31,$x60,$key
752 ?vperm v29,v29,v30,$keyperm
753 lvx $out0,$x70,$key # borrow $out0
754 ?vperm v30,v30,v31,$keyperm
755 lvx v24,$x00,$key_ # pre-load round[1]
756 ?vperm v31,v31,$out0,$keyperm
757 lvx v25,$x10,$key_ # pre-load round[2]
758
759 #lvx $inptail,0,$inp # "caller" already did this
760 #addi $inp,$inp,15 # 15 is not typo
761 subi $inp,$inp,15 # undo "caller"
762
763 le?li $idx,8
764 lvx_u $in0,$x00,$inp # load first 8 "words"
765 le?lvsl $inpperm,0,$idx
766 le?vspltisb $tmp,0x0f
767 lvx_u $in1,$x10,$inp
768 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
769 lvx_u $in2,$x20,$inp
770 le?vperm $in0,$in0,$in0,$inpperm
771 lvx_u $in3,$x30,$inp
772 le?vperm $in1,$in1,$in1,$inpperm
773 lvx_u $in4,$x40,$inp
774 le?vperm $in2,$in2,$in2,$inpperm
775 vxor $out0,$in0,$rndkey0
776 lvx_u $in5,$x50,$inp
777 le?vperm $in3,$in3,$in3,$inpperm
778 vxor $out1,$in1,$rndkey0
779 lvx_u $in6,$x60,$inp
780 le?vperm $in4,$in4,$in4,$inpperm
781 vxor $out2,$in2,$rndkey0
782 lvx_u $in7,$x70,$inp
783 addi $inp,$inp,0x80
784 le?vperm $in5,$in5,$in5,$inpperm
785 vxor $out3,$in3,$rndkey0
786 le?vperm $in6,$in6,$in6,$inpperm
787 vxor $out4,$in4,$rndkey0
788 le?vperm $in7,$in7,$in7,$inpperm
789 vxor $out5,$in5,$rndkey0
790 vxor $out6,$in6,$rndkey0
791 vxor $out7,$in7,$rndkey0
792
793 mtctr $rounds
794 b Loop_cbc_dec8x
795 .align 5
796 Loop_cbc_dec8x:
797 vncipher $out0,$out0,v24
798 vncipher $out1,$out1,v24
799 vncipher $out2,$out2,v24
800 vncipher $out3,$out3,v24
801 vncipher $out4,$out4,v24
802 vncipher $out5,$out5,v24
803 vncipher $out6,$out6,v24
804 vncipher $out7,$out7,v24
805 lvx v24,$x20,$key_ # round[3]
806 addi $key_,$key_,0x20
807
808 vncipher $out0,$out0,v25
809 vncipher $out1,$out1,v25
810 vncipher $out2,$out2,v25
811 vncipher $out3,$out3,v25
812 vncipher $out4,$out4,v25
813 vncipher $out5,$out5,v25
814 vncipher $out6,$out6,v25
815 vncipher $out7,$out7,v25
816 lvx v25,$x10,$key_ # round[4]
817 bdnz Loop_cbc_dec8x
818
819 subic $len,$len,128 # $len-=128
820 vncipher $out0,$out0,v24
821 vncipher $out1,$out1,v24
822 vncipher $out2,$out2,v24
823 vncipher $out3,$out3,v24
824 vncipher $out4,$out4,v24
825 vncipher $out5,$out5,v24
826 vncipher $out6,$out6,v24
827 vncipher $out7,$out7,v24
828
829 subfe. r0,r0,r0 # borrow?-1:0
830 vncipher $out0,$out0,v25
831 vncipher $out1,$out1,v25
832 vncipher $out2,$out2,v25
833 vncipher $out3,$out3,v25
834 vncipher $out4,$out4,v25
835 vncipher $out5,$out5,v25
836 vncipher $out6,$out6,v25
837 vncipher $out7,$out7,v25
838
839 and r0,r0,$len
840 vncipher $out0,$out0,v26
841 vncipher $out1,$out1,v26
842 vncipher $out2,$out2,v26
843 vncipher $out3,$out3,v26
844 vncipher $out4,$out4,v26
845 vncipher $out5,$out5,v26
846 vncipher $out6,$out6,v26
847 vncipher $out7,$out7,v26
848
849 add $inp,$inp,r0 # $inp is adjusted in such
850 # way that at exit from the
851 # loop inX-in7 are loaded
852 # with last "words"
853 vncipher $out0,$out0,v27
854 vncipher $out1,$out1,v27
855 vncipher $out2,$out2,v27
856 vncipher $out3,$out3,v27
857 vncipher $out4,$out4,v27
858 vncipher $out5,$out5,v27
859 vncipher $out6,$out6,v27
860 vncipher $out7,$out7,v27
861
862 addi $key_,$sp,$FRAME+15 # rewind $key_
863 vncipher $out0,$out0,v28
864 vncipher $out1,$out1,v28
865 vncipher $out2,$out2,v28
866 vncipher $out3,$out3,v28
867 vncipher $out4,$out4,v28
868 vncipher $out5,$out5,v28
869 vncipher $out6,$out6,v28
870 vncipher $out7,$out7,v28
871 lvx v24,$x00,$key_ # re-pre-load round[1]
872
873 vncipher $out0,$out0,v29
874 vncipher $out1,$out1,v29
875 vncipher $out2,$out2,v29
876 vncipher $out3,$out3,v29
877 vncipher $out4,$out4,v29
878 vncipher $out5,$out5,v29
879 vncipher $out6,$out6,v29
880 vncipher $out7,$out7,v29
881 lvx v25,$x10,$key_ # re-pre-load round[2]
882
883 vncipher $out0,$out0,v30
884 vxor $ivec,$ivec,v31 # xor with last round key
885 vncipher $out1,$out1,v30
886 vxor $in0,$in0,v31
887 vncipher $out2,$out2,v30
888 vxor $in1,$in1,v31
889 vncipher $out3,$out3,v30
890 vxor $in2,$in2,v31
891 vncipher $out4,$out4,v30
892 vxor $in3,$in3,v31
893 vncipher $out5,$out5,v30
894 vxor $in4,$in4,v31
895 vncipher $out6,$out6,v30
896 vxor $in5,$in5,v31
897 vncipher $out7,$out7,v30
898 vxor $in6,$in6,v31
899
900 vncipherlast $out0,$out0,$ivec
901 vncipherlast $out1,$out1,$in0
902 lvx_u $in0,$x00,$inp # load next input block
903 vncipherlast $out2,$out2,$in1
904 lvx_u $in1,$x10,$inp
905 vncipherlast $out3,$out3,$in2
906 le?vperm $in0,$in0,$in0,$inpperm
907 lvx_u $in2,$x20,$inp
908 vncipherlast $out4,$out4,$in3
909 le?vperm $in1,$in1,$in1,$inpperm
910 lvx_u $in3,$x30,$inp
911 vncipherlast $out5,$out5,$in4
912 le?vperm $in2,$in2,$in2,$inpperm
913 lvx_u $in4,$x40,$inp
914 vncipherlast $out6,$out6,$in5
915 le?vperm $in3,$in3,$in3,$inpperm
916 lvx_u $in5,$x50,$inp
917 vncipherlast $out7,$out7,$in6
918 le?vperm $in4,$in4,$in4,$inpperm
919 lvx_u $in6,$x60,$inp
920 vmr $ivec,$in7
921 le?vperm $in5,$in5,$in5,$inpperm
922 lvx_u $in7,$x70,$inp
923 addi $inp,$inp,0x80
924
925 le?vperm $out0,$out0,$out0,$inpperm
926 le?vperm $out1,$out1,$out1,$inpperm
927 stvx_u $out0,$x00,$out
928 le?vperm $in6,$in6,$in6,$inpperm
929 vxor $out0,$in0,$rndkey0
930 le?vperm $out2,$out2,$out2,$inpperm
931 stvx_u $out1,$x10,$out
932 le?vperm $in7,$in7,$in7,$inpperm
933 vxor $out1,$in1,$rndkey0
934 le?vperm $out3,$out3,$out3,$inpperm
935 stvx_u $out2,$x20,$out
936 vxor $out2,$in2,$rndkey0
937 le?vperm $out4,$out4,$out4,$inpperm
938 stvx_u $out3,$x30,$out
939 vxor $out3,$in3,$rndkey0
940 le?vperm $out5,$out5,$out5,$inpperm
941 stvx_u $out4,$x40,$out
942 vxor $out4,$in4,$rndkey0
943 le?vperm $out6,$out6,$out6,$inpperm
944 stvx_u $out5,$x50,$out
945 vxor $out5,$in5,$rndkey0
946 le?vperm $out7,$out7,$out7,$inpperm
947 stvx_u $out6,$x60,$out
948 vxor $out6,$in6,$rndkey0
949 stvx_u $out7,$x70,$out
950 addi $out,$out,0x80
951 vxor $out7,$in7,$rndkey0
952
953 mtctr $rounds
954 beq Loop_cbc_dec8x # did $len-=128 borrow?
955
956 addic. $len,$len,128
957 beq Lcbc_dec8x_done
958 nop
959 nop
960
961 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
962 vncipher $out1,$out1,v24
963 vncipher $out2,$out2,v24
964 vncipher $out3,$out3,v24
965 vncipher $out4,$out4,v24
966 vncipher $out5,$out5,v24
967 vncipher $out6,$out6,v24
968 vncipher $out7,$out7,v24
969 lvx v24,$x20,$key_ # round[3]
970 addi $key_,$key_,0x20
971
972 vncipher $out1,$out1,v25
973 vncipher $out2,$out2,v25
974 vncipher $out3,$out3,v25
975 vncipher $out4,$out4,v25
976 vncipher $out5,$out5,v25
977 vncipher $out6,$out6,v25
978 vncipher $out7,$out7,v25
979 lvx v25,$x10,$key_ # round[4]
980 bdnz Loop_cbc_dec8x_tail
981
982 vncipher $out1,$out1,v24
983 vncipher $out2,$out2,v24
984 vncipher $out3,$out3,v24
985 vncipher $out4,$out4,v24
986 vncipher $out5,$out5,v24
987 vncipher $out6,$out6,v24
988 vncipher $out7,$out7,v24
989
990 vncipher $out1,$out1,v25
991 vncipher $out2,$out2,v25
992 vncipher $out3,$out3,v25
993 vncipher $out4,$out4,v25
994 vncipher $out5,$out5,v25
995 vncipher $out6,$out6,v25
996 vncipher $out7,$out7,v25
997
998 vncipher $out1,$out1,v26
999 vncipher $out2,$out2,v26
1000 vncipher $out3,$out3,v26
1001 vncipher $out4,$out4,v26
1002 vncipher $out5,$out5,v26
1003 vncipher $out6,$out6,v26
1004 vncipher $out7,$out7,v26
1005
1006 vncipher $out1,$out1,v27
1007 vncipher $out2,$out2,v27
1008 vncipher $out3,$out3,v27
1009 vncipher $out4,$out4,v27
1010 vncipher $out5,$out5,v27
1011 vncipher $out6,$out6,v27
1012 vncipher $out7,$out7,v27
1013
1014 vncipher $out1,$out1,v28
1015 vncipher $out2,$out2,v28
1016 vncipher $out3,$out3,v28
1017 vncipher $out4,$out4,v28
1018 vncipher $out5,$out5,v28
1019 vncipher $out6,$out6,v28
1020 vncipher $out7,$out7,v28
1021
1022 vncipher $out1,$out1,v29
1023 vncipher $out2,$out2,v29
1024 vncipher $out3,$out3,v29
1025 vncipher $out4,$out4,v29
1026 vncipher $out5,$out5,v29
1027 vncipher $out6,$out6,v29
1028 vncipher $out7,$out7,v29
1029
1030 vncipher $out1,$out1,v30
1031 vxor $ivec,$ivec,v31 # last round key
1032 vncipher $out2,$out2,v30
1033 vxor $in1,$in1,v31
1034 vncipher $out3,$out3,v30
1035 vxor $in2,$in2,v31
1036 vncipher $out4,$out4,v30
1037 vxor $in3,$in3,v31
1038 vncipher $out5,$out5,v30
1039 vxor $in4,$in4,v31
1040 vncipher $out6,$out6,v30
1041 vxor $in5,$in5,v31
1042 vncipher $out7,$out7,v30
1043 vxor $in6,$in6,v31
1044
1045 cmplwi $len,32 # switch($len)
1046 blt Lcbc_dec8x_one
1047 nop
1048 beq Lcbc_dec8x_two
1049 cmplwi $len,64
1050 blt Lcbc_dec8x_three
1051 nop
1052 beq Lcbc_dec8x_four
1053 cmplwi $len,96
1054 blt Lcbc_dec8x_five
1055 nop
1056 beq Lcbc_dec8x_six
1057
1058 Lcbc_dec8x_seven:
1059 vncipherlast $out1,$out1,$ivec
1060 vncipherlast $out2,$out2,$in1
1061 vncipherlast $out3,$out3,$in2
1062 vncipherlast $out4,$out4,$in3
1063 vncipherlast $out5,$out5,$in4
1064 vncipherlast $out6,$out6,$in5
1065 vncipherlast $out7,$out7,$in6
1066 vmr $ivec,$in7
1067
1068 le?vperm $out1,$out1,$out1,$inpperm
1069 le?vperm $out2,$out2,$out2,$inpperm
1070 stvx_u $out1,$x00,$out
1071 le?vperm $out3,$out3,$out3,$inpperm
1072 stvx_u $out2,$x10,$out
1073 le?vperm $out4,$out4,$out4,$inpperm
1074 stvx_u $out3,$x20,$out
1075 le?vperm $out5,$out5,$out5,$inpperm
1076 stvx_u $out4,$x30,$out
1077 le?vperm $out6,$out6,$out6,$inpperm
1078 stvx_u $out5,$x40,$out
1079 le?vperm $out7,$out7,$out7,$inpperm
1080 stvx_u $out6,$x50,$out
1081 stvx_u $out7,$x60,$out
1082 addi $out,$out,0x70
1083 b Lcbc_dec8x_done
1084
1085 .align 5
1086 Lcbc_dec8x_six:
1087 vncipherlast $out2,$out2,$ivec
1088 vncipherlast $out3,$out3,$in2
1089 vncipherlast $out4,$out4,$in3
1090 vncipherlast $out5,$out5,$in4
1091 vncipherlast $out6,$out6,$in5
1092 vncipherlast $out7,$out7,$in6
1093 vmr $ivec,$in7
1094
1095 le?vperm $out2,$out2,$out2,$inpperm
1096 le?vperm $out3,$out3,$out3,$inpperm
1097 stvx_u $out2,$x00,$out
1098 le?vperm $out4,$out4,$out4,$inpperm
1099 stvx_u $out3,$x10,$out
1100 le?vperm $out5,$out5,$out5,$inpperm
1101 stvx_u $out4,$x20,$out
1102 le?vperm $out6,$out6,$out6,$inpperm
1103 stvx_u $out5,$x30,$out
1104 le?vperm $out7,$out7,$out7,$inpperm
1105 stvx_u $out6,$x40,$out
1106 stvx_u $out7,$x50,$out
1107 addi $out,$out,0x60
1108 b Lcbc_dec8x_done
1109
1110 .align 5
1111 Lcbc_dec8x_five:
1112 vncipherlast $out3,$out3,$ivec
1113 vncipherlast $out4,$out4,$in3
1114 vncipherlast $out5,$out5,$in4
1115 vncipherlast $out6,$out6,$in5
1116 vncipherlast $out7,$out7,$in6
1117 vmr $ivec,$in7
1118
1119 le?vperm $out3,$out3,$out3,$inpperm
1120 le?vperm $out4,$out4,$out4,$inpperm
1121 stvx_u $out3,$x00,$out
1122 le?vperm $out5,$out5,$out5,$inpperm
1123 stvx_u $out4,$x10,$out
1124 le?vperm $out6,$out6,$out6,$inpperm
1125 stvx_u $out5,$x20,$out
1126 le?vperm $out7,$out7,$out7,$inpperm
1127 stvx_u $out6,$x30,$out
1128 stvx_u $out7,$x40,$out
1129 addi $out,$out,0x50
1130 b Lcbc_dec8x_done
1131
1132 .align 5
1133 Lcbc_dec8x_four:
1134 vncipherlast $out4,$out4,$ivec
1135 vncipherlast $out5,$out5,$in4
1136 vncipherlast $out6,$out6,$in5
1137 vncipherlast $out7,$out7,$in6
1138 vmr $ivec,$in7
1139
1140 le?vperm $out4,$out4,$out4,$inpperm
1141 le?vperm $out5,$out5,$out5,$inpperm
1142 stvx_u $out4,$x00,$out
1143 le?vperm $out6,$out6,$out6,$inpperm
1144 stvx_u $out5,$x10,$out
1145 le?vperm $out7,$out7,$out7,$inpperm
1146 stvx_u $out6,$x20,$out
1147 stvx_u $out7,$x30,$out
1148 addi $out,$out,0x40
1149 b Lcbc_dec8x_done
1150
1151 .align 5
1152 Lcbc_dec8x_three:
1153 vncipherlast $out5,$out5,$ivec
1154 vncipherlast $out6,$out6,$in5
1155 vncipherlast $out7,$out7,$in6
1156 vmr $ivec,$in7
1157
1158 le?vperm $out5,$out5,$out5,$inpperm
1159 le?vperm $out6,$out6,$out6,$inpperm
1160 stvx_u $out5,$x00,$out
1161 le?vperm $out7,$out7,$out7,$inpperm
1162 stvx_u $out6,$x10,$out
1163 stvx_u $out7,$x20,$out
1164 addi $out,$out,0x30
1165 b Lcbc_dec8x_done
1166
1167 .align 5
1168 Lcbc_dec8x_two:
1169 vncipherlast $out6,$out6,$ivec
1170 vncipherlast $out7,$out7,$in6
1171 vmr $ivec,$in7
1172
1173 le?vperm $out6,$out6,$out6,$inpperm
1174 le?vperm $out7,$out7,$out7,$inpperm
1175 stvx_u $out6,$x00,$out
1176 stvx_u $out7,$x10,$out
1177 addi $out,$out,0x20
1178 b Lcbc_dec8x_done
1179
1180 .align 5
1181 Lcbc_dec8x_one:
1182 vncipherlast $out7,$out7,$ivec
1183 vmr $ivec,$in7
1184
1185 le?vperm $out7,$out7,$out7,$inpperm
1186 stvx_u $out7,0,$out
1187 addi $out,$out,0x10
1188
1189 Lcbc_dec8x_done:
1190 le?vperm $ivec,$ivec,$ivec,$inpperm
1191 stvx_u $ivec,0,$ivp # write [unaligned] iv
1192
1193 li r10,`$FRAME+15`
1194 li r11,`$FRAME+31`
1195 stvx $inpperm,r10,$sp # wipe copies of round keys
1196 addi r10,r10,32
1197 stvx $inpperm,r11,$sp
1198 addi r11,r11,32
1199 stvx $inpperm,r10,$sp
1200 addi r10,r10,32
1201 stvx $inpperm,r11,$sp
1202 addi r11,r11,32
1203 stvx $inpperm,r10,$sp
1204 addi r10,r10,32
1205 stvx $inpperm,r11,$sp
1206 addi r11,r11,32
1207 stvx $inpperm,r10,$sp
1208 addi r10,r10,32
1209 stvx $inpperm,r11,$sp
1210 addi r11,r11,32
1211
1212 mtspr 256,$vrsave
1213 lvx v20,r10,$sp # ABI says so
1214 addi r10,r10,32
1215 lvx v21,r11,$sp
1216 addi r11,r11,32
1217 lvx v22,r10,$sp
1218 addi r10,r10,32
1219 lvx v23,r11,$sp
1220 addi r11,r11,32
1221 lvx v24,r10,$sp
1222 addi r10,r10,32
1223 lvx v25,r11,$sp
1224 addi r11,r11,32
1225 lvx v26,r10,$sp
1226 addi r10,r10,32
1227 lvx v27,r11,$sp
1228 addi r11,r11,32
1229 lvx v28,r10,$sp
1230 addi r10,r10,32
1231 lvx v29,r11,$sp
1232 addi r11,r11,32
1233 lvx v30,r10,$sp
1234 lvx v31,r11,$sp
1235 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1236 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1237 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1238 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1239 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1240 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1241 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1242 blr
1243 .long 0
1244 .byte 0,12,0x04,0,0x80,6,6,0
1245 .long 0
1246 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1247 ___
1248 }} }}}
1249
1250 #########################################################################
1251 {{{ # CTR procedure[s] #
1252 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1253 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1254 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1255 map("v$_",(4..11));
1256 my $dat=$tmp;
1257
1258 $code.=<<___;
1259 .globl .${prefix}_ctr32_encrypt_blocks
1260 .align 5
1261 .${prefix}_ctr32_encrypt_blocks:
1262 ${UCMP}i $len,1
1263 bltlr-
1264
1265 lis r0,0xfff0
1266 mfspr $vrsave,256
1267 mtspr 256,r0
1268
1269 li $idx,15
1270 vxor $rndkey0,$rndkey0,$rndkey0
1271 le?vspltisb $tmp,0x0f
1272
1273 lvx $ivec,0,$ivp # load [unaligned] iv
1274 lvsl $inpperm,0,$ivp
1275 lvx $inptail,$idx,$ivp
1276 vspltisb $one,1
1277 le?vxor $inpperm,$inpperm,$tmp
1278 vperm $ivec,$ivec,$inptail,$inpperm
1279 vsldoi $one,$rndkey0,$one,1
1280
1281 neg r11,$inp
1282 ?lvsl $keyperm,0,$key # prepare for unaligned key
1283 lwz $rounds,240($key)
1284
1285 lvsr $inpperm,0,r11 # prepare for unaligned load
1286 lvx $inptail,0,$inp
1287 addi $inp,$inp,15 # 15 is not typo
1288 le?vxor $inpperm,$inpperm,$tmp
1289
1290 srwi $rounds,$rounds,1
1291 li $idx,16
1292 subi $rounds,$rounds,1
1293
1294 ${UCMP}i $len,8
1295 bge _aesp8_ctr32_encrypt8x
1296
1297 ?lvsr $outperm,0,$out # prepare for unaligned store
1298 vspltisb $outmask,-1
1299 lvx $outhead,0,$out
1300 ?vperm $outmask,$rndkey0,$outmask,$outperm
1301 le?vxor $outperm,$outperm,$tmp
1302
1303 lvx $rndkey0,0,$key
1304 mtctr $rounds
1305 lvx $rndkey1,$idx,$key
1306 addi $idx,$idx,16
1307 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1308 vxor $inout,$ivec,$rndkey0
1309 lvx $rndkey0,$idx,$key
1310 addi $idx,$idx,16
1311 b Loop_ctr32_enc
1312
1313 .align 5
1314 Loop_ctr32_enc:
1315 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1316 vcipher $inout,$inout,$rndkey1
1317 lvx $rndkey1,$idx,$key
1318 addi $idx,$idx,16
1319 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1320 vcipher $inout,$inout,$rndkey0
1321 lvx $rndkey0,$idx,$key
1322 addi $idx,$idx,16
1323 bdnz Loop_ctr32_enc
1324
1325 vadduwm $ivec,$ivec,$one
1326 vmr $dat,$inptail
1327 lvx $inptail,0,$inp
1328 addi $inp,$inp,16
1329 subic. $len,$len,1 # blocks--
1330
1331 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1332 vcipher $inout,$inout,$rndkey1
1333 lvx $rndkey1,$idx,$key
1334 vperm $dat,$dat,$inptail,$inpperm
1335 li $idx,16
1336 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1337 lvx $rndkey0,0,$key
1338 vxor $dat,$dat,$rndkey1 # last round key
1339 vcipherlast $inout,$inout,$dat
1340
1341 lvx $rndkey1,$idx,$key
1342 addi $idx,$idx,16
1343 vperm $inout,$inout,$inout,$outperm
1344 vsel $dat,$outhead,$inout,$outmask
1345 mtctr $rounds
1346 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1347 vmr $outhead,$inout
1348 vxor $inout,$ivec,$rndkey0
1349 lvx $rndkey0,$idx,$key
1350 addi $idx,$idx,16
1351 stvx $dat,0,$out
1352 addi $out,$out,16
1353 bne Loop_ctr32_enc
1354
1355 addi $out,$out,-1
1356 lvx $inout,0,$out # redundant in aligned case
1357 vsel $inout,$outhead,$inout,$outmask
1358 stvx $inout,0,$out
1359
1360 mtspr 256,$vrsave
1361 blr
1362 .long 0
1363 .byte 0,12,0x14,0,0,0,6,0
1364 .long 0
1365 ___
1366 #########################################################################
1367 {{ # Optimized CTR procedure #
1368 my $key_="r11";
1369 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1370 $x00=0 if ($flavour =~ /osx/);
1371 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1372 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1373 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1374 # v26-v31 last 6 round keys
1375 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1376 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1377
1378 $code.=<<___;
1379 .align 5
1380 _aesp8_ctr32_encrypt8x:
1381 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1382 li r10,`$FRAME+8*16+15`
1383 li r11,`$FRAME+8*16+31`
1384 stvx v20,r10,$sp # ABI says so
1385 addi r10,r10,32
1386 stvx v21,r11,$sp
1387 addi r11,r11,32
1388 stvx v22,r10,$sp
1389 addi r10,r10,32
1390 stvx v23,r11,$sp
1391 addi r11,r11,32
1392 stvx v24,r10,$sp
1393 addi r10,r10,32
1394 stvx v25,r11,$sp
1395 addi r11,r11,32
1396 stvx v26,r10,$sp
1397 addi r10,r10,32
1398 stvx v27,r11,$sp
1399 addi r11,r11,32
1400 stvx v28,r10,$sp
1401 addi r10,r10,32
1402 stvx v29,r11,$sp
1403 addi r11,r11,32
1404 stvx v30,r10,$sp
1405 stvx v31,r11,$sp
1406 li r0,-1
1407 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1408 li $x10,0x10
1409 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1410 li $x20,0x20
1411 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1412 li $x30,0x30
1413 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1414 li $x40,0x40
1415 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1416 li $x50,0x50
1417 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1418 li $x60,0x60
1419 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1420 li $x70,0x70
1421 mtspr 256,r0
1422
1423 subi $rounds,$rounds,3 # -4 in total
1424
1425 lvx $rndkey0,$x00,$key # load key schedule
1426 lvx v30,$x10,$key
1427 addi $key,$key,0x20
1428 lvx v31,$x00,$key
1429 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1430 addi $key_,$sp,$FRAME+15
1431 mtctr $rounds
1432
1433 Load_ctr32_enc_key:
1434 ?vperm v24,v30,v31,$keyperm
1435 lvx v30,$x10,$key
1436 addi $key,$key,0x20
1437 stvx v24,$x00,$key_ # off-load round[1]
1438 ?vperm v25,v31,v30,$keyperm
1439 lvx v31,$x00,$key
1440 stvx v25,$x10,$key_ # off-load round[2]
1441 addi $key_,$key_,0x20
1442 bdnz Load_ctr32_enc_key
1443
1444 lvx v26,$x10,$key
1445 ?vperm v24,v30,v31,$keyperm
1446 lvx v27,$x20,$key
1447 stvx v24,$x00,$key_ # off-load round[3]
1448 ?vperm v25,v31,v26,$keyperm
1449 lvx v28,$x30,$key
1450 stvx v25,$x10,$key_ # off-load round[4]
1451 addi $key_,$sp,$FRAME+15 # rewind $key_
1452 ?vperm v26,v26,v27,$keyperm
1453 lvx v29,$x40,$key
1454 ?vperm v27,v27,v28,$keyperm
1455 lvx v30,$x50,$key
1456 ?vperm v28,v28,v29,$keyperm
1457 lvx v31,$x60,$key
1458 ?vperm v29,v29,v30,$keyperm
1459 lvx $out0,$x70,$key # borrow $out0
1460 ?vperm v30,v30,v31,$keyperm
1461 lvx v24,$x00,$key_ # pre-load round[1]
1462 ?vperm v31,v31,$out0,$keyperm
1463 lvx v25,$x10,$key_ # pre-load round[2]
1464
1465 vadduwm $two,$one,$one
1466 subi $inp,$inp,15 # undo "caller"
1467 $SHL $len,$len,4
1468
1469 vadduwm $out1,$ivec,$one # counter values ...
1470 vadduwm $out2,$ivec,$two
1471 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1472 le?li $idx,8
1473 vadduwm $out3,$out1,$two
1474 vxor $out1,$out1,$rndkey0
1475 le?lvsl $inpperm,0,$idx
1476 vadduwm $out4,$out2,$two
1477 vxor $out2,$out2,$rndkey0
1478 le?vspltisb $tmp,0x0f
1479 vadduwm $out5,$out3,$two
1480 vxor $out3,$out3,$rndkey0
1481 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1482 vadduwm $out6,$out4,$two
1483 vxor $out4,$out4,$rndkey0
1484 vadduwm $out7,$out5,$two
1485 vxor $out5,$out5,$rndkey0
1486 vadduwm $ivec,$out6,$two # next counter value
1487 vxor $out6,$out6,$rndkey0
1488 vxor $out7,$out7,$rndkey0
1489
1490 mtctr $rounds
1491 b Loop_ctr32_enc8x
1492 .align 5
1493 Loop_ctr32_enc8x:
1494 vcipher $out0,$out0,v24
1495 vcipher $out1,$out1,v24
1496 vcipher $out2,$out2,v24
1497 vcipher $out3,$out3,v24
1498 vcipher $out4,$out4,v24
1499 vcipher $out5,$out5,v24
1500 vcipher $out6,$out6,v24
1501 vcipher $out7,$out7,v24
1502 Loop_ctr32_enc8x_middle:
1503 lvx v24,$x20,$key_ # round[3]
1504 addi $key_,$key_,0x20
1505
1506 vcipher $out0,$out0,v25
1507 vcipher $out1,$out1,v25
1508 vcipher $out2,$out2,v25
1509 vcipher $out3,$out3,v25
1510 vcipher $out4,$out4,v25
1511 vcipher $out5,$out5,v25
1512 vcipher $out6,$out6,v25
1513 vcipher $out7,$out7,v25
1514 lvx v25,$x10,$key_ # round[4]
1515 bdnz Loop_ctr32_enc8x
1516
1517 subic r11,$len,256 # $len-256, borrow $key_
1518 vcipher $out0,$out0,v24
1519 vcipher $out1,$out1,v24
1520 vcipher $out2,$out2,v24
1521 vcipher $out3,$out3,v24
1522 vcipher $out4,$out4,v24
1523 vcipher $out5,$out5,v24
1524 vcipher $out6,$out6,v24
1525 vcipher $out7,$out7,v24
1526
1527 subfe r0,r0,r0 # borrow?-1:0
1528 vcipher $out0,$out0,v25
1529 vcipher $out1,$out1,v25
1530 vcipher $out2,$out2,v25
1531 vcipher $out3,$out3,v25
1532 vcipher $out4,$out4,v25
1533 vcipher $out5,$out5,v25
1534 vcipher $out6,$out6,v25
1535 vcipher $out7,$out7,v25
1536
1537 and r0,r0,r11
1538 addi $key_,$sp,$FRAME+15 # rewind $key_
1539 vcipher $out0,$out0,v26
1540 vcipher $out1,$out1,v26
1541 vcipher $out2,$out2,v26
1542 vcipher $out3,$out3,v26
1543 vcipher $out4,$out4,v26
1544 vcipher $out5,$out5,v26
1545 vcipher $out6,$out6,v26
1546 vcipher $out7,$out7,v26
1547 lvx v24,$x00,$key_ # re-pre-load round[1]
1548
1549 subic $len,$len,129 # $len-=129
1550 vcipher $out0,$out0,v27
1551 addi $len,$len,1 # $len-=128 really
1552 vcipher $out1,$out1,v27
1553 vcipher $out2,$out2,v27
1554 vcipher $out3,$out3,v27
1555 vcipher $out4,$out4,v27
1556 vcipher $out5,$out5,v27
1557 vcipher $out6,$out6,v27
1558 vcipher $out7,$out7,v27
1559 lvx v25,$x10,$key_ # re-pre-load round[2]
1560
1561 vcipher $out0,$out0,v28
1562 lvx_u $in0,$x00,$inp # load input
1563 vcipher $out1,$out1,v28
1564 lvx_u $in1,$x10,$inp
1565 vcipher $out2,$out2,v28
1566 lvx_u $in2,$x20,$inp
1567 vcipher $out3,$out3,v28
1568 lvx_u $in3,$x30,$inp
1569 vcipher $out4,$out4,v28
1570 lvx_u $in4,$x40,$inp
1571 vcipher $out5,$out5,v28
1572 lvx_u $in5,$x50,$inp
1573 vcipher $out6,$out6,v28
1574 lvx_u $in6,$x60,$inp
1575 vcipher $out7,$out7,v28
1576 lvx_u $in7,$x70,$inp
1577 addi $inp,$inp,0x80
1578
1579 vcipher $out0,$out0,v29
1580 le?vperm $in0,$in0,$in0,$inpperm
1581 vcipher $out1,$out1,v29
1582 le?vperm $in1,$in1,$in1,$inpperm
1583 vcipher $out2,$out2,v29
1584 le?vperm $in2,$in2,$in2,$inpperm
1585 vcipher $out3,$out3,v29
1586 le?vperm $in3,$in3,$in3,$inpperm
1587 vcipher $out4,$out4,v29
1588 le?vperm $in4,$in4,$in4,$inpperm
1589 vcipher $out5,$out5,v29
1590 le?vperm $in5,$in5,$in5,$inpperm
1591 vcipher $out6,$out6,v29
1592 le?vperm $in6,$in6,$in6,$inpperm
1593 vcipher $out7,$out7,v29
1594 le?vperm $in7,$in7,$in7,$inpperm
1595
1596 add $inp,$inp,r0 # $inp is adjusted in such
1597 # way that at exit from the
1598 # loop inX-in7 are loaded
1599 # with last "words"
1600 subfe. r0,r0,r0 # borrow?-1:0
1601 vcipher $out0,$out0,v30
1602 vxor $in0,$in0,v31 # xor with last round key
1603 vcipher $out1,$out1,v30
1604 vxor $in1,$in1,v31
1605 vcipher $out2,$out2,v30
1606 vxor $in2,$in2,v31
1607 vcipher $out3,$out3,v30
1608 vxor $in3,$in3,v31
1609 vcipher $out4,$out4,v30
1610 vxor $in4,$in4,v31
1611 vcipher $out5,$out5,v30
1612 vxor $in5,$in5,v31
1613 vcipher $out6,$out6,v30
1614 vxor $in6,$in6,v31
1615 vcipher $out7,$out7,v30
1616 vxor $in7,$in7,v31
1617
1618 bne Lctr32_enc8x_break # did $len-129 borrow?
1619
1620 vcipherlast $in0,$out0,$in0
1621 vcipherlast $in1,$out1,$in1
1622 vadduwm $out1,$ivec,$one # counter values ...
1623 vcipherlast $in2,$out2,$in2
1624 vadduwm $out2,$ivec,$two
1625 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1626 vcipherlast $in3,$out3,$in3
1627 vadduwm $out3,$out1,$two
1628 vxor $out1,$out1,$rndkey0
1629 vcipherlast $in4,$out4,$in4
1630 vadduwm $out4,$out2,$two
1631 vxor $out2,$out2,$rndkey0
1632 vcipherlast $in5,$out5,$in5
1633 vadduwm $out5,$out3,$two
1634 vxor $out3,$out3,$rndkey0
1635 vcipherlast $in6,$out6,$in6
1636 vadduwm $out6,$out4,$two
1637 vxor $out4,$out4,$rndkey0
1638 vcipherlast $in7,$out7,$in7
1639 vadduwm $out7,$out5,$two
1640 vxor $out5,$out5,$rndkey0
1641 le?vperm $in0,$in0,$in0,$inpperm
1642 vadduwm $ivec,$out6,$two # next counter value
1643 vxor $out6,$out6,$rndkey0
1644 le?vperm $in1,$in1,$in1,$inpperm
1645 vxor $out7,$out7,$rndkey0
1646 mtctr $rounds
1647
1648 vcipher $out0,$out0,v24
1649 stvx_u $in0,$x00,$out
1650 le?vperm $in2,$in2,$in2,$inpperm
1651 vcipher $out1,$out1,v24
1652 stvx_u $in1,$x10,$out
1653 le?vperm $in3,$in3,$in3,$inpperm
1654 vcipher $out2,$out2,v24
1655 stvx_u $in2,$x20,$out
1656 le?vperm $in4,$in4,$in4,$inpperm
1657 vcipher $out3,$out3,v24
1658 stvx_u $in3,$x30,$out
1659 le?vperm $in5,$in5,$in5,$inpperm
1660 vcipher $out4,$out4,v24
1661 stvx_u $in4,$x40,$out
1662 le?vperm $in6,$in6,$in6,$inpperm
1663 vcipher $out5,$out5,v24
1664 stvx_u $in5,$x50,$out
1665 le?vperm $in7,$in7,$in7,$inpperm
1666 vcipher $out6,$out6,v24
1667 stvx_u $in6,$x60,$out
1668 vcipher $out7,$out7,v24
1669 stvx_u $in7,$x70,$out
1670 addi $out,$out,0x80
1671
1672 b Loop_ctr32_enc8x_middle
1673
1674 .align 5
1675 Lctr32_enc8x_break:
1676 cmpwi $len,-0x60
1677 blt Lctr32_enc8x_one
1678 nop
1679 beq Lctr32_enc8x_two
1680 cmpwi $len,-0x40
1681 blt Lctr32_enc8x_three
1682 nop
1683 beq Lctr32_enc8x_four
1684 cmpwi $len,-0x20
1685 blt Lctr32_enc8x_five
1686 nop
1687 beq Lctr32_enc8x_six
1688 cmpwi $len,0x00
1689 blt Lctr32_enc8x_seven
1690
1691 Lctr32_enc8x_eight:
1692 vcipherlast $out0,$out0,$in0
1693 vcipherlast $out1,$out1,$in1
1694 vcipherlast $out2,$out2,$in2
1695 vcipherlast $out3,$out3,$in3
1696 vcipherlast $out4,$out4,$in4
1697 vcipherlast $out5,$out5,$in5
1698 vcipherlast $out6,$out6,$in6
1699 vcipherlast $out7,$out7,$in7
1700
1701 le?vperm $out0,$out0,$out0,$inpperm
1702 le?vperm $out1,$out1,$out1,$inpperm
1703 stvx_u $out0,$x00,$out
1704 le?vperm $out2,$out2,$out2,$inpperm
1705 stvx_u $out1,$x10,$out
1706 le?vperm $out3,$out3,$out3,$inpperm
1707 stvx_u $out2,$x20,$out
1708 le?vperm $out4,$out4,$out4,$inpperm
1709 stvx_u $out3,$x30,$out
1710 le?vperm $out5,$out5,$out5,$inpperm
1711 stvx_u $out4,$x40,$out
1712 le?vperm $out6,$out6,$out6,$inpperm
1713 stvx_u $out5,$x50,$out
1714 le?vperm $out7,$out7,$out7,$inpperm
1715 stvx_u $out6,$x60,$out
1716 stvx_u $out7,$x70,$out
1717 addi $out,$out,0x80
1718 b Lctr32_enc8x_done
1719
1720 .align 5
1721 Lctr32_enc8x_seven:
1722 vcipherlast $out0,$out0,$in1
1723 vcipherlast $out1,$out1,$in2
1724 vcipherlast $out2,$out2,$in3
1725 vcipherlast $out3,$out3,$in4
1726 vcipherlast $out4,$out4,$in5
1727 vcipherlast $out5,$out5,$in6
1728 vcipherlast $out6,$out6,$in7
1729
1730 le?vperm $out0,$out0,$out0,$inpperm
1731 le?vperm $out1,$out1,$out1,$inpperm
1732 stvx_u $out0,$x00,$out
1733 le?vperm $out2,$out2,$out2,$inpperm
1734 stvx_u $out1,$x10,$out
1735 le?vperm $out3,$out3,$out3,$inpperm
1736 stvx_u $out2,$x20,$out
1737 le?vperm $out4,$out4,$out4,$inpperm
1738 stvx_u $out3,$x30,$out
1739 le?vperm $out5,$out5,$out5,$inpperm
1740 stvx_u $out4,$x40,$out
1741 le?vperm $out6,$out6,$out6,$inpperm
1742 stvx_u $out5,$x50,$out
1743 stvx_u $out6,$x60,$out
1744 addi $out,$out,0x70
1745 b Lctr32_enc8x_done
1746
1747 .align 5
1748 Lctr32_enc8x_six:
1749 vcipherlast $out0,$out0,$in2
1750 vcipherlast $out1,$out1,$in3
1751 vcipherlast $out2,$out2,$in4
1752 vcipherlast $out3,$out3,$in5
1753 vcipherlast $out4,$out4,$in6
1754 vcipherlast $out5,$out5,$in7
1755
1756 le?vperm $out0,$out0,$out0,$inpperm
1757 le?vperm $out1,$out1,$out1,$inpperm
1758 stvx_u $out0,$x00,$out
1759 le?vperm $out2,$out2,$out2,$inpperm
1760 stvx_u $out1,$x10,$out
1761 le?vperm $out3,$out3,$out3,$inpperm
1762 stvx_u $out2,$x20,$out
1763 le?vperm $out4,$out4,$out4,$inpperm
1764 stvx_u $out3,$x30,$out
1765 le?vperm $out5,$out5,$out5,$inpperm
1766 stvx_u $out4,$x40,$out
1767 stvx_u $out5,$x50,$out
1768 addi $out,$out,0x60
1769 b Lctr32_enc8x_done
1770
1771 .align 5
1772 Lctr32_enc8x_five:
1773 vcipherlast $out0,$out0,$in3
1774 vcipherlast $out1,$out1,$in4
1775 vcipherlast $out2,$out2,$in5
1776 vcipherlast $out3,$out3,$in6
1777 vcipherlast $out4,$out4,$in7
1778
1779 le?vperm $out0,$out0,$out0,$inpperm
1780 le?vperm $out1,$out1,$out1,$inpperm
1781 stvx_u $out0,$x00,$out
1782 le?vperm $out2,$out2,$out2,$inpperm
1783 stvx_u $out1,$x10,$out
1784 le?vperm $out3,$out3,$out3,$inpperm
1785 stvx_u $out2,$x20,$out
1786 le?vperm $out4,$out4,$out4,$inpperm
1787 stvx_u $out3,$x30,$out
1788 stvx_u $out4,$x40,$out
1789 addi $out,$out,0x50
1790 b Lctr32_enc8x_done
1791
1792 .align 5
1793 Lctr32_enc8x_four:
1794 vcipherlast $out0,$out0,$in4
1795 vcipherlast $out1,$out1,$in5
1796 vcipherlast $out2,$out2,$in6
1797 vcipherlast $out3,$out3,$in7
1798
1799 le?vperm $out0,$out0,$out0,$inpperm
1800 le?vperm $out1,$out1,$out1,$inpperm
1801 stvx_u $out0,$x00,$out
1802 le?vperm $out2,$out2,$out2,$inpperm
1803 stvx_u $out1,$x10,$out
1804 le?vperm $out3,$out3,$out3,$inpperm
1805 stvx_u $out2,$x20,$out
1806 stvx_u $out3,$x30,$out
1807 addi $out,$out,0x40
1808 b Lctr32_enc8x_done
1809
1810 .align 5
1811 Lctr32_enc8x_three:
1812 vcipherlast $out0,$out0,$in5
1813 vcipherlast $out1,$out1,$in6
1814 vcipherlast $out2,$out2,$in7
1815
1816 le?vperm $out0,$out0,$out0,$inpperm
1817 le?vperm $out1,$out1,$out1,$inpperm
1818 stvx_u $out0,$x00,$out
1819 le?vperm $out2,$out2,$out2,$inpperm
1820 stvx_u $out1,$x10,$out
1821 stvx_u $out2,$x20,$out
1822 addi $out,$out,0x30
1823 b Lcbc_dec8x_done
1824
1825 .align 5
1826 Lctr32_enc8x_two:
1827 vcipherlast $out0,$out0,$in6
1828 vcipherlast $out1,$out1,$in7
1829
1830 le?vperm $out0,$out0,$out0,$inpperm
1831 le?vperm $out1,$out1,$out1,$inpperm
1832 stvx_u $out0,$x00,$out
1833 stvx_u $out1,$x10,$out
1834 addi $out,$out,0x20
1835 b Lcbc_dec8x_done
1836
1837 .align 5
1838 Lctr32_enc8x_one:
1839 vcipherlast $out0,$out0,$in7
1840
1841 le?vperm $out0,$out0,$out0,$inpperm
1842 stvx_u $out0,0,$out
1843 addi $out,$out,0x10
1844
1845 Lctr32_enc8x_done:
1846 li r10,`$FRAME+15`
1847 li r11,`$FRAME+31`
1848 stvx $inpperm,r10,$sp # wipe copies of round keys
1849 addi r10,r10,32
1850 stvx $inpperm,r11,$sp
1851 addi r11,r11,32
1852 stvx $inpperm,r10,$sp
1853 addi r10,r10,32
1854 stvx $inpperm,r11,$sp
1855 addi r11,r11,32
1856 stvx $inpperm,r10,$sp
1857 addi r10,r10,32
1858 stvx $inpperm,r11,$sp
1859 addi r11,r11,32
1860 stvx $inpperm,r10,$sp
1861 addi r10,r10,32
1862 stvx $inpperm,r11,$sp
1863 addi r11,r11,32
1864
1865 mtspr 256,$vrsave
1866 lvx v20,r10,$sp # ABI says so
1867 addi r10,r10,32
1868 lvx v21,r11,$sp
1869 addi r11,r11,32
1870 lvx v22,r10,$sp
1871 addi r10,r10,32
1872 lvx v23,r11,$sp
1873 addi r11,r11,32
1874 lvx v24,r10,$sp
1875 addi r10,r10,32
1876 lvx v25,r11,$sp
1877 addi r11,r11,32
1878 lvx v26,r10,$sp
1879 addi r10,r10,32
1880 lvx v27,r11,$sp
1881 addi r11,r11,32
1882 lvx v28,r10,$sp
1883 addi r10,r10,32
1884 lvx v29,r11,$sp
1885 addi r11,r11,32
1886 lvx v30,r10,$sp
1887 lvx v31,r11,$sp
1888 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1889 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1890 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1891 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1892 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1893 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1894 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1895 blr
1896 .long 0
1897 .byte 0,12,0x04,0,0x80,6,6,0
1898 .long 0
1899 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1900 ___
1901 }} }}}
1902
1903 #########################################################################
1904 {{{ # XTS procedures #
1905 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1906 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1907 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1908 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1909 my $taillen = $key2;
1910
1911 ($inp,$idx) = ($idx,$inp); # reassign
1912
1913 $code.=<<___;
1914 .globl .${prefix}_xts_encrypt
1915 .align 5
1916 .${prefix}_xts_encrypt:
1917 mr $inp,r3 # reassign
1918 li r3,-1
1919 ${UCMP}i $len,16
1920 bltlr-
1921
1922 lis r0,0xfff0
1923 mfspr r12,256 # save vrsave
1924 li r11,0
1925 mtspr 256,r0
1926
1927 vspltisb $seven,0x07 # 0x070707..07
1928 le?lvsl $leperm,r11,r11
1929 le?vspltisb $tmp,0x0f
1930 le?vxor $leperm,$leperm,$seven
1931
1932 li $idx,15
1933 lvx $tweak,0,$ivp # load [unaligned] iv
1934 lvsl $inpperm,0,$ivp
1935 lvx $inptail,$idx,$ivp
1936 le?vxor $inpperm,$inpperm,$tmp
1937 vperm $tweak,$tweak,$inptail,$inpperm
1938
1939 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1940 lwz $rounds,240($key2)
1941 srwi $rounds,$rounds,1
1942 subi $rounds,$rounds,1
1943 li $idx,16
1944
1945 neg r11,$inp
1946 lvsr $inpperm,0,r11 # prepare for unaligned load
1947 lvx $inout,0,$inp
1948 addi $inp,$inp,15 # 15 is not typo
1949 le?vxor $inpperm,$inpperm,$tmp
1950
1951 lvx $rndkey0,0,$key2
1952 lvx $rndkey1,$idx,$key2
1953 addi $idx,$idx,16
1954 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1955 vxor $tweak,$tweak,$rndkey0
1956 lvx $rndkey0,$idx,$key2
1957 addi $idx,$idx,16
1958 mtctr $rounds
1959
1960 Ltweak_xts_enc:
1961 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1962 vcipher $tweak,$tweak,$rndkey1
1963 lvx $rndkey1,$idx,$key2
1964 addi $idx,$idx,16
1965 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1966 vcipher $tweak,$tweak,$rndkey0
1967 lvx $rndkey0,$idx,$key2
1968 addi $idx,$idx,16
1969 bdnz Ltweak_xts_enc
1970
1971 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1972 vcipher $tweak,$tweak,$rndkey1
1973 lvx $rndkey1,$idx,$key2
1974 li $idx,16
1975 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1976 vcipherlast $tweak,$tweak,$rndkey0
1977
1978 lvx $inptail,0,$inp
1979 addi $inp,$inp,16
1980
1981 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
1982 lwz $rounds,240($key1)
1983 srwi $rounds,$rounds,1
1984 subi $rounds,$rounds,1
1985 li $idx,16
1986
1987 vslb $eighty7,$seven,$seven # 0x808080..80
1988 vor $eighty7,$eighty7,$seven # 0x878787..87
1989 vspltisb $tmp,1 # 0x010101..01
1990 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
1991
1992 ${UCMP}i $len,96
1993 bge _aesp8_xts_encrypt6x
1994
1995 andi. $taillen,$len,15
1996 subic r0,$len,32
1997 subi $taillen,$taillen,16
1998 subfe r0,r0,r0
1999 and r0,r0,$taillen
2000 add $inp,$inp,r0
2001
2002 lvx $rndkey0,0,$key1
2003 lvx $rndkey1,$idx,$key1
2004 addi $idx,$idx,16
2005 vperm $inout,$inout,$inptail,$inpperm
2006 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2007 vxor $inout,$inout,$tweak
2008 vxor $inout,$inout,$rndkey0
2009 lvx $rndkey0,$idx,$key1
2010 addi $idx,$idx,16
2011 mtctr $rounds
2012 b Loop_xts_enc
2013
2014 .align 5
2015 Loop_xts_enc:
2016 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2017 vcipher $inout,$inout,$rndkey1
2018 lvx $rndkey1,$idx,$key1
2019 addi $idx,$idx,16
2020 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2021 vcipher $inout,$inout,$rndkey0
2022 lvx $rndkey0,$idx,$key1
2023 addi $idx,$idx,16
2024 bdnz Loop_xts_enc
2025
2026 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2027 vcipher $inout,$inout,$rndkey1
2028 lvx $rndkey1,$idx,$key1
2029 li $idx,16
2030 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2031 vxor $rndkey0,$rndkey0,$tweak
2032 vcipherlast $output,$inout,$rndkey0
2033
2034 le?vperm $tmp,$output,$output,$leperm
2035 be?nop
2036 le?stvx_u $tmp,0,$out
2037 be?stvx_u $output,0,$out
2038 addi $out,$out,16
2039
2040 subic. $len,$len,16
2041 beq Lxts_enc_done
2042
2043 vmr $inout,$inptail
2044 lvx $inptail,0,$inp
2045 addi $inp,$inp,16
2046 lvx $rndkey0,0,$key1
2047 lvx $rndkey1,$idx,$key1
2048 addi $idx,$idx,16
2049
2050 subic r0,$len,32
2051 subfe r0,r0,r0
2052 and r0,r0,$taillen
2053 add $inp,$inp,r0
2054
2055 vsrab $tmp,$tweak,$seven # next tweak value
2056 vaddubm $tweak,$tweak,$tweak
2057 vsldoi $tmp,$tmp,$tmp,15
2058 vand $tmp,$tmp,$eighty7
2059 vxor $tweak,$tweak,$tmp
2060
2061 vperm $inout,$inout,$inptail,$inpperm
2062 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2063 vxor $inout,$inout,$tweak
2064 vxor $output,$output,$rndkey0 # just in case $len<16
2065 vxor $inout,$inout,$rndkey0
2066 lvx $rndkey0,$idx,$key1
2067 addi $idx,$idx,16
2068
2069 mtctr $rounds
2070 ${UCMP}i $len,16
2071 bge Loop_xts_enc
2072
2073 vxor $output,$output,$tweak
2074 lvsr $inpperm,0,$len # $inpperm is no longer needed
2075 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2076 vspltisb $tmp,-1
2077 vperm $inptail,$inptail,$tmp,$inpperm
2078 vsel $inout,$inout,$output,$inptail
2079
2080 subi r11,$out,17
2081 subi $out,$out,16
2082 mtctr $len
2083 li $len,16
2084 Loop_xts_enc_steal:
2085 lbzu r0,1(r11)
2086 stb r0,16(r11)
2087 bdnz Loop_xts_enc_steal
2088
2089 mtctr $rounds
2090 b Loop_xts_enc # one more time...
2091
2092 Lxts_enc_done:
2093 mtspr 256,r12 # restore vrsave
2094 li r3,0
2095 blr
2096 .long 0
2097 .byte 0,12,0x04,0,0x80,6,6,0
2098 .long 0
2099 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2100
2101 .globl .${prefix}_xts_decrypt
2102 .align 5
2103 .${prefix}_xts_decrypt:
2104 mr $inp,r3 # reassign
2105 li r3,-1
2106 ${UCMP}i $len,16
2107 bltlr-
2108
2109 lis r0,0xfff8
2110 mfspr r12,256 # save vrsave
2111 li r11,0
2112 mtspr 256,r0
2113
2114 andi. r0,$len,15
2115 neg r0,r0
2116 andi. r0,r0,16
2117 sub $len,$len,r0
2118
2119 vspltisb $seven,0x07 # 0x070707..07
2120 le?lvsl $leperm,r11,r11
2121 le?vspltisb $tmp,0x0f
2122 le?vxor $leperm,$leperm,$seven
2123
2124 li $idx,15
2125 lvx $tweak,0,$ivp # load [unaligned] iv
2126 lvsl $inpperm,0,$ivp
2127 lvx $inptail,$idx,$ivp
2128 le?vxor $inpperm,$inpperm,$tmp
2129 vperm $tweak,$tweak,$inptail,$inpperm
2130
2131 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2132 lwz $rounds,240($key2)
2133 srwi $rounds,$rounds,1
2134 subi $rounds,$rounds,1
2135 li $idx,16
2136
2137 neg r11,$inp
2138 lvsr $inpperm,0,r11 # prepare for unaligned load
2139 lvx $inout,0,$inp
2140 addi $inp,$inp,15 # 15 is not typo
2141 le?vxor $inpperm,$inpperm,$tmp
2142
2143 lvx $rndkey0,0,$key2
2144 lvx $rndkey1,$idx,$key2
2145 addi $idx,$idx,16
2146 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2147 vxor $tweak,$tweak,$rndkey0
2148 lvx $rndkey0,$idx,$key2
2149 addi $idx,$idx,16
2150 mtctr $rounds
2151
2152 Ltweak_xts_dec:
2153 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2154 vcipher $tweak,$tweak,$rndkey1
2155 lvx $rndkey1,$idx,$key2
2156 addi $idx,$idx,16
2157 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2158 vcipher $tweak,$tweak,$rndkey0
2159 lvx $rndkey0,$idx,$key2
2160 addi $idx,$idx,16
2161 bdnz Ltweak_xts_dec
2162
2163 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2164 vcipher $tweak,$tweak,$rndkey1
2165 lvx $rndkey1,$idx,$key2
2166 li $idx,16
2167 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2168 vcipherlast $tweak,$tweak,$rndkey0
2169
2170 lvx $inptail,0,$inp
2171 addi $inp,$inp,16
2172
2173 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2174 lwz $rounds,240($key1)
2175 srwi $rounds,$rounds,1
2176 subi $rounds,$rounds,1
2177 li $idx,16
2178
2179 vslb $eighty7,$seven,$seven # 0x808080..80
2180 vor $eighty7,$eighty7,$seven # 0x878787..87
2181 vspltisb $tmp,1 # 0x010101..01
2182 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2183
2184 ${UCMP}i $len,96
2185 bge _aesp8_xts_decrypt6x
2186
2187 lvx $rndkey0,0,$key1
2188 lvx $rndkey1,$idx,$key1
2189 addi $idx,$idx,16
2190 vperm $inout,$inout,$inptail,$inpperm
2191 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2192 vxor $inout,$inout,$tweak
2193 vxor $inout,$inout,$rndkey0
2194 lvx $rndkey0,$idx,$key1
2195 addi $idx,$idx,16
2196 mtctr $rounds
2197
2198 ${UCMP}i $len,16
2199 blt Ltail_xts_dec
2200 be?b Loop_xts_dec
2201
2202 .align 5
2203 Loop_xts_dec:
2204 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2205 vncipher $inout,$inout,$rndkey1
2206 lvx $rndkey1,$idx,$key1
2207 addi $idx,$idx,16
2208 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2209 vncipher $inout,$inout,$rndkey0
2210 lvx $rndkey0,$idx,$key1
2211 addi $idx,$idx,16
2212 bdnz Loop_xts_dec
2213
2214 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2215 vncipher $inout,$inout,$rndkey1
2216 lvx $rndkey1,$idx,$key1
2217 li $idx,16
2218 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2219 vxor $rndkey0,$rndkey0,$tweak
2220 vncipherlast $output,$inout,$rndkey0
2221
2222 le?vperm $tmp,$output,$output,$leperm
2223 be?nop
2224 le?stvx_u $tmp,0,$out
2225 be?stvx_u $output,0,$out
2226 addi $out,$out,16
2227
2228 subic. $len,$len,16
2229 beq Lxts_dec_done
2230
2231 vmr $inout,$inptail
2232 lvx $inptail,0,$inp
2233 addi $inp,$inp,16
2234 lvx $rndkey0,0,$key1
2235 lvx $rndkey1,$idx,$key1
2236 addi $idx,$idx,16
2237
2238 vsrab $tmp,$tweak,$seven # next tweak value
2239 vaddubm $tweak,$tweak,$tweak
2240 vsldoi $tmp,$tmp,$tmp,15
2241 vand $tmp,$tmp,$eighty7
2242 vxor $tweak,$tweak,$tmp
2243
2244 vperm $inout,$inout,$inptail,$inpperm
2245 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2246 vxor $inout,$inout,$tweak
2247 vxor $inout,$inout,$rndkey0
2248 lvx $rndkey0,$idx,$key1
2249 addi $idx,$idx,16
2250
2251 mtctr $rounds
2252 ${UCMP}i $len,16
2253 bge Loop_xts_dec
2254
2255 Ltail_xts_dec:
2256 vsrab $tmp,$tweak,$seven # next tweak value
2257 vaddubm $tweak1,$tweak,$tweak
2258 vsldoi $tmp,$tmp,$tmp,15
2259 vand $tmp,$tmp,$eighty7
2260 vxor $tweak1,$tweak1,$tmp
2261
2262 subi $inp,$inp,16
2263 add $inp,$inp,$len
2264
2265 vxor $inout,$inout,$tweak # :-(
2266 vxor $inout,$inout,$tweak1 # :-)
2267
2268 Loop_xts_dec_short:
2269 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2270 vncipher $inout,$inout,$rndkey1
2271 lvx $rndkey1,$idx,$key1
2272 addi $idx,$idx,16
2273 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2274 vncipher $inout,$inout,$rndkey0
2275 lvx $rndkey0,$idx,$key1
2276 addi $idx,$idx,16
2277 bdnz Loop_xts_dec_short
2278
2279 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2280 vncipher $inout,$inout,$rndkey1
2281 lvx $rndkey1,$idx,$key1
2282 li $idx,16
2283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vxor $rndkey0,$rndkey0,$tweak1
2285 vncipherlast $output,$inout,$rndkey0
2286
2287 le?vperm $tmp,$output,$output,$leperm
2288 be?nop
2289 le?stvx_u $tmp,0,$out
2290 be?stvx_u $output,0,$out
2291
2292 vmr $inout,$inptail
2293 lvx $inptail,0,$inp
2294 #addi $inp,$inp,16
2295 lvx $rndkey0,0,$key1
2296 lvx $rndkey1,$idx,$key1
2297 addi $idx,$idx,16
2298 vperm $inout,$inout,$inptail,$inpperm
2299 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2300
2301 lvsr $inpperm,0,$len # $inpperm is no longer needed
2302 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2303 vspltisb $tmp,-1
2304 vperm $inptail,$inptail,$tmp,$inpperm
2305 vsel $inout,$inout,$output,$inptail
2306
2307 vxor $rndkey0,$rndkey0,$tweak
2308 vxor $inout,$inout,$rndkey0
2309 lvx $rndkey0,$idx,$key1
2310 addi $idx,$idx,16
2311
2312 subi r11,$out,1
2313 mtctr $len
2314 li $len,16
2315 Loop_xts_dec_steal:
2316 lbzu r0,1(r11)
2317 stb r0,16(r11)
2318 bdnz Loop_xts_dec_steal
2319
2320 mtctr $rounds
2321 b Loop_xts_dec # one more time...
2322
2323 Lxts_dec_done:
2324 mtspr 256,r12 # restore vrsave
2325 li r3,0
2326 blr
2327 .long 0
2328 .byte 0,12,0x04,0,0x80,6,6,0
2329 .long 0
2330 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2331 ___
2332 #########################################################################
2333 {{ # Optimized XTS procedures #
2334 my $key_="r11";
2335 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
2336 $x00=0 if ($flavour =~ /osx/);
2337 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2338 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2339 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2340 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2341 # v26-v31 last 6 round keys
2342 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2343 my $taillen=$x70;
2344
2345 $code.=<<___;
2346 .align 5
2347 _aesp8_xts_encrypt6x:
2348 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2349 mflr r0
2350 li r7,`$FRAME+8*16+15`
2351 li r8,`$FRAME+8*16+31`
2352 $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2353 stvx v20,r7,$sp # ABI says so
2354 addi r7,r7,32
2355 stvx v21,r8,$sp
2356 addi r8,r8,32
2357 stvx v22,r7,$sp
2358 addi r7,r7,32
2359 stvx v23,r8,$sp
2360 addi r8,r8,32
2361 stvx v24,r7,$sp
2362 addi r7,r7,32
2363 stvx v25,r8,$sp
2364 addi r8,r8,32
2365 stvx v26,r7,$sp
2366 addi r7,r7,32
2367 stvx v27,r8,$sp
2368 addi r8,r8,32
2369 stvx v28,r7,$sp
2370 addi r7,r7,32
2371 stvx v29,r8,$sp
2372 addi r8,r8,32
2373 stvx v30,r7,$sp
2374 stvx v31,r8,$sp
2375 mr r7,r0
2376 li r0,-1
2377 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2378 li $x10,0x10
2379 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2380 li $x20,0x20
2381 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2382 li $x30,0x30
2383 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2384 li $x40,0x40
2385 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2386 li $x50,0x50
2387 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2388 li $x60,0x60
2389 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2390 li $x70,0x70
2391 mtspr 256,r0
2392
2393 subi $rounds,$rounds,3 # -4 in total
2394
2395 lvx $rndkey0,$x00,$key1 # load key schedule
2396 lvx v30,$x10,$key1
2397 addi $key1,$key1,0x20
2398 lvx v31,$x00,$key1
2399 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2400 addi $key_,$sp,$FRAME+15
2401 mtctr $rounds
2402
2403 Load_xts_enc_key:
2404 ?vperm v24,v30,v31,$keyperm
2405 lvx v30,$x10,$key1
2406 addi $key1,$key1,0x20
2407 stvx v24,$x00,$key_ # off-load round[1]
2408 ?vperm v25,v31,v30,$keyperm
2409 lvx v31,$x00,$key1
2410 stvx v25,$x10,$key_ # off-load round[2]
2411 addi $key_,$key_,0x20
2412 bdnz Load_xts_enc_key
2413
2414 lvx v26,$x10,$key1
2415 ?vperm v24,v30,v31,$keyperm
2416 lvx v27,$x20,$key1
2417 stvx v24,$x00,$key_ # off-load round[3]
2418 ?vperm v25,v31,v26,$keyperm
2419 lvx v28,$x30,$key1
2420 stvx v25,$x10,$key_ # off-load round[4]
2421 addi $key_,$sp,$FRAME+15 # rewind $key_
2422 ?vperm v26,v26,v27,$keyperm
2423 lvx v29,$x40,$key1
2424 ?vperm v27,v27,v28,$keyperm
2425 lvx v30,$x50,$key1
2426 ?vperm v28,v28,v29,$keyperm
2427 lvx v31,$x60,$key1
2428 ?vperm v29,v29,v30,$keyperm
2429 lvx $twk5,$x70,$key1 # borrow $twk5
2430 ?vperm v30,v30,v31,$keyperm
2431 lvx v24,$x00,$key_ # pre-load round[1]
2432 ?vperm v31,v31,$twk5,$keyperm
2433 lvx v25,$x10,$key_ # pre-load round[2]
2434
2435 vperm $in0,$inout,$inptail,$inpperm
2436 subi $inp,$inp,31 # undo "caller"
2437 vxor $twk0,$tweak,$rndkey0
2438 vsrab $tmp,$tweak,$seven # next tweak value
2439 vaddubm $tweak,$tweak,$tweak
2440 vsldoi $tmp,$tmp,$tmp,15
2441 vand $tmp,$tmp,$eighty7
2442 vxor $out0,$in0,$twk0
2443 vxor $tweak,$tweak,$tmp
2444
2445 lvx_u $in1,$x10,$inp
2446 vxor $twk1,$tweak,$rndkey0
2447 vsrab $tmp,$tweak,$seven # next tweak value
2448 vaddubm $tweak,$tweak,$tweak
2449 vsldoi $tmp,$tmp,$tmp,15
2450 le?vperm $in1,$in1,$in1,$leperm
2451 vand $tmp,$tmp,$eighty7
2452 vxor $out1,$in1,$twk1
2453 vxor $tweak,$tweak,$tmp
2454
2455 lvx_u $in2,$x20,$inp
2456 andi. $taillen,$len,15
2457 vxor $twk2,$tweak,$rndkey0
2458 vsrab $tmp,$tweak,$seven # next tweak value
2459 vaddubm $tweak,$tweak,$tweak
2460 vsldoi $tmp,$tmp,$tmp,15
2461 le?vperm $in2,$in2,$in2,$leperm
2462 vand $tmp,$tmp,$eighty7
2463 vxor $out2,$in2,$twk2
2464 vxor $tweak,$tweak,$tmp
2465
2466 lvx_u $in3,$x30,$inp
2467 sub $len,$len,$taillen
2468 vxor $twk3,$tweak,$rndkey0
2469 vsrab $tmp,$tweak,$seven # next tweak value
2470 vaddubm $tweak,$tweak,$tweak
2471 vsldoi $tmp,$tmp,$tmp,15
2472 le?vperm $in3,$in3,$in3,$leperm
2473 vand $tmp,$tmp,$eighty7
2474 vxor $out3,$in3,$twk3
2475 vxor $tweak,$tweak,$tmp
2476
2477 lvx_u $in4,$x40,$inp
2478 subi $len,$len,0x60
2479 vxor $twk4,$tweak,$rndkey0
2480 vsrab $tmp,$tweak,$seven # next tweak value
2481 vaddubm $tweak,$tweak,$tweak
2482 vsldoi $tmp,$tmp,$tmp,15
2483 le?vperm $in4,$in4,$in4,$leperm
2484 vand $tmp,$tmp,$eighty7
2485 vxor $out4,$in4,$twk4
2486 vxor $tweak,$tweak,$tmp
2487
2488 lvx_u $in5,$x50,$inp
2489 addi $inp,$inp,0x60
2490 vxor $twk5,$tweak,$rndkey0
2491 vsrab $tmp,$tweak,$seven # next tweak value
2492 vaddubm $tweak,$tweak,$tweak
2493 vsldoi $tmp,$tmp,$tmp,15
2494 le?vperm $in5,$in5,$in5,$leperm
2495 vand $tmp,$tmp,$eighty7
2496 vxor $out5,$in5,$twk5
2497 vxor $tweak,$tweak,$tmp
2498
2499 vxor v31,v31,$rndkey0
2500 mtctr $rounds
2501 b Loop_xts_enc6x
2502
2503 .align 5
2504 Loop_xts_enc6x:
2505 vcipher $out0,$out0,v24
2506 vcipher $out1,$out1,v24
2507 vcipher $out2,$out2,v24
2508 vcipher $out3,$out3,v24
2509 vcipher $out4,$out4,v24
2510 vcipher $out5,$out5,v24
2511 lvx v24,$x20,$key_ # round[3]
2512 addi $key_,$key_,0x20
2513
2514 vcipher $out0,$out0,v25
2515 vcipher $out1,$out1,v25
2516 vcipher $out2,$out2,v25
2517 vcipher $out3,$out3,v25
2518 vcipher $out4,$out4,v25
2519 vcipher $out5,$out5,v25
2520 lvx v25,$x10,$key_ # round[4]
2521 bdnz Loop_xts_enc6x
2522
2523 subic $len,$len,96 # $len-=96
2524 vxor $in0,$twk0,v31 # xor with last round key
2525 vcipher $out0,$out0,v24
2526 vcipher $out1,$out1,v24
2527 vsrab $tmp,$tweak,$seven # next tweak value
2528 vxor $twk0,$tweak,$rndkey0
2529 vaddubm $tweak,$tweak,$tweak
2530 vcipher $out2,$out2,v24
2531 vcipher $out3,$out3,v24
2532 vsldoi $tmp,$tmp,$tmp,15
2533 vcipher $out4,$out4,v24
2534 vcipher $out5,$out5,v24
2535
2536 subfe. r0,r0,r0 # borrow?-1:0
2537 vand $tmp,$tmp,$eighty7
2538 vcipher $out0,$out0,v25
2539 vcipher $out1,$out1,v25
2540 vxor $tweak,$tweak,$tmp
2541 vcipher $out2,$out2,v25
2542 vcipher $out3,$out3,v25
2543 vxor $in1,$twk1,v31
2544 vsrab $tmp,$tweak,$seven # next tweak value
2545 vxor $twk1,$tweak,$rndkey0
2546 vcipher $out4,$out4,v25
2547 vcipher $out5,$out5,v25
2548
2549 and r0,r0,$len
2550 vaddubm $tweak,$tweak,$tweak
2551 vsldoi $tmp,$tmp,$tmp,15
2552 vcipher $out0,$out0,v26
2553 vcipher $out1,$out1,v26
2554 vand $tmp,$tmp,$eighty7
2555 vcipher $out2,$out2,v26
2556 vcipher $out3,$out3,v26
2557 vxor $tweak,$tweak,$tmp
2558 vcipher $out4,$out4,v26
2559 vcipher $out5,$out5,v26
2560
2561 add $inp,$inp,r0 # $inp is adjusted in such
2562 # way that at exit from the
2563 # loop inX-in5 are loaded
2564 # with last "words"
2565 vxor $in2,$twk2,v31
2566 vsrab $tmp,$tweak,$seven # next tweak value
2567 vxor $twk2,$tweak,$rndkey0
2568 vaddubm $tweak,$tweak,$tweak
2569 vcipher $out0,$out0,v27
2570 vcipher $out1,$out1,v27
2571 vsldoi $tmp,$tmp,$tmp,15
2572 vcipher $out2,$out2,v27
2573 vcipher $out3,$out3,v27
2574 vand $tmp,$tmp,$eighty7
2575 vcipher $out4,$out4,v27
2576 vcipher $out5,$out5,v27
2577
2578 addi $key_,$sp,$FRAME+15 # rewind $key_
2579 vxor $tweak,$tweak,$tmp
2580 vcipher $out0,$out0,v28
2581 vcipher $out1,$out1,v28
2582 vxor $in3,$twk3,v31
2583 vsrab $tmp,$tweak,$seven # next tweak value
2584 vxor $twk3,$tweak,$rndkey0
2585 vcipher $out2,$out2,v28
2586 vcipher $out3,$out3,v28
2587 vaddubm $tweak,$tweak,$tweak
2588 vsldoi $tmp,$tmp,$tmp,15
2589 vcipher $out4,$out4,v28
2590 vcipher $out5,$out5,v28
2591 lvx v24,$x00,$key_ # re-pre-load round[1]
2592 vand $tmp,$tmp,$eighty7
2593
2594 vcipher $out0,$out0,v29
2595 vcipher $out1,$out1,v29
2596 vxor $tweak,$tweak,$tmp
2597 vcipher $out2,$out2,v29
2598 vcipher $out3,$out3,v29
2599 vxor $in4,$twk4,v31
2600 vsrab $tmp,$tweak,$seven # next tweak value
2601 vxor $twk4,$tweak,$rndkey0
2602 vcipher $out4,$out4,v29
2603 vcipher $out5,$out5,v29
2604 lvx v25,$x10,$key_ # re-pre-load round[2]
2605 vaddubm $tweak,$tweak,$tweak
2606 vsldoi $tmp,$tmp,$tmp,15
2607
2608 vcipher $out0,$out0,v30
2609 vcipher $out1,$out1,v30
2610 vand $tmp,$tmp,$eighty7
2611 vcipher $out2,$out2,v30
2612 vcipher $out3,$out3,v30
2613 vxor $tweak,$tweak,$tmp
2614 vcipher $out4,$out4,v30
2615 vcipher $out5,$out5,v30
2616 vxor $in5,$twk5,v31
2617 vsrab $tmp,$tweak,$seven # next tweak value
2618 vxor $twk5,$tweak,$rndkey0
2619
2620 vcipherlast $out0,$out0,$in0
2621 lvx_u $in0,$x00,$inp # load next input block
2622 vaddubm $tweak,$tweak,$tweak
2623 vsldoi $tmp,$tmp,$tmp,15
2624 vcipherlast $out1,$out1,$in1
2625 lvx_u $in1,$x10,$inp
2626 vcipherlast $out2,$out2,$in2
2627 le?vperm $in0,$in0,$in0,$leperm
2628 lvx_u $in2,$x20,$inp
2629 vand $tmp,$tmp,$eighty7
2630 vcipherlast $out3,$out3,$in3
2631 le?vperm $in1,$in1,$in1,$leperm
2632 lvx_u $in3,$x30,$inp
2633 vcipherlast $out4,$out4,$in4
2634 le?vperm $in2,$in2,$in2,$leperm
2635 lvx_u $in4,$x40,$inp
2636 vxor $tweak,$tweak,$tmp
2637 vcipherlast $tmp,$out5,$in5 # last block might be needed
2638 # in stealing mode
2639 le?vperm $in3,$in3,$in3,$leperm
2640 lvx_u $in5,$x50,$inp
2641 addi $inp,$inp,0x60
2642 le?vperm $in4,$in4,$in4,$leperm
2643 le?vperm $in5,$in5,$in5,$leperm
2644
2645 le?vperm $out0,$out0,$out0,$leperm
2646 le?vperm $out1,$out1,$out1,$leperm
2647 stvx_u $out0,$x00,$out # store output
2648 vxor $out0,$in0,$twk0
2649 le?vperm $out2,$out2,$out2,$leperm
2650 stvx_u $out1,$x10,$out
2651 vxor $out1,$in1,$twk1
2652 le?vperm $out3,$out3,$out3,$leperm
2653 stvx_u $out2,$x20,$out
2654 vxor $out2,$in2,$twk2
2655 le?vperm $out4,$out4,$out4,$leperm
2656 stvx_u $out3,$x30,$out
2657 vxor $out3,$in3,$twk3
2658 le?vperm $out5,$tmp,$tmp,$leperm
2659 stvx_u $out4,$x40,$out
2660 vxor $out4,$in4,$twk4
2661 le?stvx_u $out5,$x50,$out
2662 be?stvx_u $tmp, $x50,$out
2663 vxor $out5,$in5,$twk5
2664 addi $out,$out,0x60
2665
2666 mtctr $rounds
2667 beq Loop_xts_enc6x # did $len-=96 borrow?
2668
2669 addic. $len,$len,0x60
2670 beq Lxts_enc6x_zero
2671 cmpwi $len,0x20
2672 blt Lxts_enc6x_one
2673 nop
2674 beq Lxts_enc6x_two
2675 cmpwi $len,0x40
2676 blt Lxts_enc6x_three
2677 nop
2678 beq Lxts_enc6x_four
2679
2680 Lxts_enc6x_five:
2681 vxor $out0,$in1,$twk0
2682 vxor $out1,$in2,$twk1
2683 vxor $out2,$in3,$twk2
2684 vxor $out3,$in4,$twk3
2685 vxor $out4,$in5,$twk4
2686
2687 bl _aesp8_xts_enc5x
2688
2689 le?vperm $out0,$out0,$out0,$leperm
2690 vmr $twk0,$twk5 # unused tweak
2691 le?vperm $out1,$out1,$out1,$leperm
2692 stvx_u $out0,$x00,$out # store output
2693 le?vperm $out2,$out2,$out2,$leperm
2694 stvx_u $out1,$x10,$out
2695 le?vperm $out3,$out3,$out3,$leperm
2696 stvx_u $out2,$x20,$out
2697 vxor $tmp,$out4,$twk5 # last block prep for stealing
2698 le?vperm $out4,$out4,$out4,$leperm
2699 stvx_u $out3,$x30,$out
2700 stvx_u $out4,$x40,$out
2701 addi $out,$out,0x50
2702 bne Lxts_enc6x_steal
2703 b Lxts_enc6x_done
2704
2705 .align 4
2706 Lxts_enc6x_four:
2707 vxor $out0,$in2,$twk0
2708 vxor $out1,$in3,$twk1
2709 vxor $out2,$in4,$twk2
2710 vxor $out3,$in5,$twk3
2711 vxor $out4,$out4,$out4
2712
2713 bl _aesp8_xts_enc5x
2714
2715 le?vperm $out0,$out0,$out0,$leperm
2716 vmr $twk0,$twk4 # unused tweak
2717 le?vperm $out1,$out1,$out1,$leperm
2718 stvx_u $out0,$x00,$out # store output
2719 le?vperm $out2,$out2,$out2,$leperm
2720 stvx_u $out1,$x10,$out
2721 vxor $tmp,$out3,$twk4 # last block prep for stealing
2722 le?vperm $out3,$out3,$out3,$leperm
2723 stvx_u $out2,$x20,$out
2724 stvx_u $out3,$x30,$out
2725 addi $out,$out,0x40
2726 bne Lxts_enc6x_steal
2727 b Lxts_enc6x_done
2728
2729 .align 4
2730 Lxts_enc6x_three:
2731 vxor $out0,$in3,$twk0
2732 vxor $out1,$in4,$twk1
2733 vxor $out2,$in5,$twk2
2734 vxor $out3,$out3,$out3
2735 vxor $out4,$out4,$out4
2736
2737 bl _aesp8_xts_enc5x
2738
2739 le?vperm $out0,$out0,$out0,$leperm
2740 vmr $twk0,$twk3 # unused tweak
2741 le?vperm $out1,$out1,$out1,$leperm
2742 stvx_u $out0,$x00,$out # store output
2743 vxor $tmp,$out2,$twk3 # last block prep for stealing
2744 le?vperm $out2,$out2,$out2,$leperm
2745 stvx_u $out1,$x10,$out
2746 stvx_u $out2,$x20,$out
2747 addi $out,$out,0x30
2748 bne Lxts_enc6x_steal
2749 b Lxts_enc6x_done
2750
2751 .align 4
2752 Lxts_enc6x_two:
2753 vxor $out0,$in4,$twk0
2754 vxor $out1,$in5,$twk1
2755 vxor $out2,$out2,$out2
2756 vxor $out3,$out3,$out3
2757 vxor $out4,$out4,$out4
2758
2759 bl _aesp8_xts_enc5x
2760
2761 le?vperm $out0,$out0,$out0,$leperm
2762 vmr $twk0,$twk2 # unused tweak
2763 vxor $tmp,$out1,$twk2 # last block prep for stealing
2764 le?vperm $out1,$out1,$out1,$leperm
2765 stvx_u $out0,$x00,$out # store output
2766 stvx_u $out1,$x10,$out
2767 addi $out,$out,0x20
2768 bne Lxts_enc6x_steal
2769 b Lxts_enc6x_done
2770
2771 .align 4
2772 Lxts_enc6x_one:
2773 vxor $out0,$in5,$twk0
2774 nop
2775 Loop_xts_enc1x:
2776 vcipher $out0,$out0,v24
2777 lvx v24,$x20,$key_ # round[3]
2778 addi $key_,$key_,0x20
2779
2780 vcipher $out0,$out0,v25
2781 lvx v25,$x10,$key_ # round[4]
2782 bdnz Loop_xts_enc1x
2783
2784 add $inp,$inp,$taillen
2785 cmpwi $taillen,0
2786 vcipher $out0,$out0,v24
2787
2788 subi $inp,$inp,16
2789 vcipher $out0,$out0,v25
2790
2791 lvsr $inpperm,0,$taillen
2792 vcipher $out0,$out0,v26
2793
2794 lvx_u $in0,0,$inp
2795 vcipher $out0,$out0,v27
2796
2797 addi $key_,$sp,$FRAME+15 # rewind $key_
2798 vcipher $out0,$out0,v28
2799 lvx v24,$x00,$key_ # re-pre-load round[1]
2800
2801 vcipher $out0,$out0,v29
2802 lvx v25,$x10,$key_ # re-pre-load round[2]
2803 vxor $twk0,$twk0,v31
2804
2805 le?vperm $in0,$in0,$in0,$leperm
2806 vcipher $out0,$out0,v30
2807
2808 vperm $in0,$in0,$in0,$inpperm
2809 vcipherlast $out0,$out0,$twk0
2810
2811 vmr $twk0,$twk1 # unused tweak
2812 vxor $tmp,$out0,$twk1 # last block prep for stealing
2813 le?vperm $out0,$out0,$out0,$leperm
2814 stvx_u $out0,$x00,$out # store output
2815 addi $out,$out,0x10
2816 bne Lxts_enc6x_steal
2817 b Lxts_enc6x_done
2818
2819 .align 4
2820 Lxts_enc6x_zero:
2821 cmpwi $taillen,0
2822 beq Lxts_enc6x_done
2823
2824 add $inp,$inp,$taillen
2825 subi $inp,$inp,16
2826 lvx_u $in0,0,$inp
2827 lvsr $inpperm,0,$taillen # $in5 is no more
2828 le?vperm $in0,$in0,$in0,$leperm
2829 vperm $in0,$in0,$in0,$inpperm
2830 vxor $tmp,$tmp,$twk0
2831 Lxts_enc6x_steal:
2832 vxor $in0,$in0,$twk0
2833 vxor $out0,$out0,$out0
2834 vspltisb $out1,-1
2835 vperm $out0,$out0,$out1,$inpperm
2836 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2837
2838 subi r3,$out,17
2839 subi $out,$out,16
2840 mtctr $taillen
2841 Loop_xts_enc6x_steal:
2842 lbzu r0,1(r3)
2843 stb r0,16(r3)
2844 bdnz Loop_xts_enc6x_steal
2845
2846 li $taillen,0
2847 mtctr $rounds
2848 b Loop_xts_enc1x # one more time...
2849
2850 .align 4
2851 Lxts_enc6x_done:
2852 mtlr r7
2853 li r10,`$FRAME+15`
2854 li r11,`$FRAME+31`
2855 stvx $seven,r10,$sp # wipe copies of round keys
2856 addi r10,r10,32
2857 stvx $seven,r11,$sp
2858 addi r11,r11,32
2859 stvx $seven,r10,$sp
2860 addi r10,r10,32
2861 stvx $seven,r11,$sp
2862 addi r11,r11,32
2863 stvx $seven,r10,$sp
2864 addi r10,r10,32
2865 stvx $seven,r11,$sp
2866 addi r11,r11,32
2867 stvx $seven,r10,$sp
2868 addi r10,r10,32
2869 stvx $seven,r11,$sp
2870 addi r11,r11,32
2871
2872 mtspr 256,$vrsave
2873 lvx v20,r10,$sp # ABI says so
2874 addi r10,r10,32
2875 lvx v21,r11,$sp
2876 addi r11,r11,32
2877 lvx v22,r10,$sp
2878 addi r10,r10,32
2879 lvx v23,r11,$sp
2880 addi r11,r11,32
2881 lvx v24,r10,$sp
2882 addi r10,r10,32
2883 lvx v25,r11,$sp
2884 addi r11,r11,32
2885 lvx v26,r10,$sp
2886 addi r10,r10,32
2887 lvx v27,r11,$sp
2888 addi r11,r11,32
2889 lvx v28,r10,$sp
2890 addi r10,r10,32
2891 lvx v29,r11,$sp
2892 addi r11,r11,32
2893 lvx v30,r10,$sp
2894 lvx v31,r11,$sp
2895 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2896 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2897 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2898 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2899 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2900 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2901 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2902 blr
2903 .long 0
2904 .byte 0,12,0x04,1,0x80,6,6,0
2905 .long 0
2906
2907 .align 5
2908 _aesp8_xts_enc5x:
2909 vcipher $out0,$out0,v24
2910 vcipher $out1,$out1,v24
2911 vcipher $out2,$out2,v24
2912 vcipher $out3,$out3,v24
2913 vcipher $out4,$out4,v24
2914 lvx v24,$x20,$key_ # round[3]
2915 addi $key_,$key_,0x20
2916
2917 vcipher $out0,$out0,v25
2918 vcipher $out1,$out1,v25
2919 vcipher $out2,$out2,v25
2920 vcipher $out3,$out3,v25
2921 vcipher $out4,$out4,v25
2922 lvx v25,$x10,$key_ # round[4]
2923 bdnz _aesp8_xts_enc5x
2924
2925 add $inp,$inp,$taillen
2926 cmpwi $taillen,0
2927 vcipher $out0,$out0,v24
2928 vcipher $out1,$out1,v24
2929 vcipher $out2,$out2,v24
2930 vcipher $out3,$out3,v24
2931 vcipher $out4,$out4,v24
2932
2933 subi $inp,$inp,16
2934 vcipher $out0,$out0,v25
2935 vcipher $out1,$out1,v25
2936 vcipher $out2,$out2,v25
2937 vcipher $out3,$out3,v25
2938 vcipher $out4,$out4,v25
2939 vxor $twk0,$twk0,v31
2940
2941 vcipher $out0,$out0,v26
2942 lvsr $inpperm,r0,$taillen # $in5 is no more
2943 vcipher $out1,$out1,v26
2944 vcipher $out2,$out2,v26
2945 vcipher $out3,$out3,v26
2946 vcipher $out4,$out4,v26
2947 vxor $in1,$twk1,v31
2948
2949 vcipher $out0,$out0,v27
2950 lvx_u $in0,0,$inp
2951 vcipher $out1,$out1,v27
2952 vcipher $out2,$out2,v27
2953 vcipher $out3,$out3,v27
2954 vcipher $out4,$out4,v27
2955 vxor $in2,$twk2,v31
2956
2957 addi $key_,$sp,$FRAME+15 # rewind $key_
2958 vcipher $out0,$out0,v28
2959 vcipher $out1,$out1,v28
2960 vcipher $out2,$out2,v28
2961 vcipher $out3,$out3,v28
2962 vcipher $out4,$out4,v28
2963 lvx v24,$x00,$key_ # re-pre-load round[1]
2964 vxor $in3,$twk3,v31
2965
2966 vcipher $out0,$out0,v29
2967 le?vperm $in0,$in0,$in0,$leperm
2968 vcipher $out1,$out1,v29
2969 vcipher $out2,$out2,v29
2970 vcipher $out3,$out3,v29
2971 vcipher $out4,$out4,v29
2972 lvx v25,$x10,$key_ # re-pre-load round[2]
2973 vxor $in4,$twk4,v31
2974
2975 vcipher $out0,$out0,v30
2976 vperm $in0,$in0,$in0,$inpperm
2977 vcipher $out1,$out1,v30
2978 vcipher $out2,$out2,v30
2979 vcipher $out3,$out3,v30
2980 vcipher $out4,$out4,v30
2981
2982 vcipherlast $out0,$out0,$twk0
2983 vcipherlast $out1,$out1,$in1
2984 vcipherlast $out2,$out2,$in2
2985 vcipherlast $out3,$out3,$in3
2986 vcipherlast $out4,$out4,$in4
2987 blr
2988 .long 0
2989 .byte 0,12,0x14,0,0,0,0,0
2990
2991 .align 5
2992 _aesp8_xts_decrypt6x:
2993 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2994 mflr r0
2995 li r7,`$FRAME+8*16+15`
2996 li r8,`$FRAME+8*16+31`
2997 $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2998 stvx v20,r7,$sp # ABI says so
2999 addi r7,r7,32
3000 stvx v21,r8,$sp
3001 addi r8,r8,32
3002 stvx v22,r7,$sp
3003 addi r7,r7,32
3004 stvx v23,r8,$sp
3005 addi r8,r8,32
3006 stvx v24,r7,$sp
3007 addi r7,r7,32
3008 stvx v25,r8,$sp
3009 addi r8,r8,32
3010 stvx v26,r7,$sp
3011 addi r7,r7,32
3012 stvx v27,r8,$sp
3013 addi r8,r8,32
3014 stvx v28,r7,$sp
3015 addi r7,r7,32
3016 stvx v29,r8,$sp
3017 addi r8,r8,32
3018 stvx v30,r7,$sp
3019 stvx v31,r8,$sp
3020 mr r7,r0
3021 li r0,-1
3022 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3023 li $x10,0x10
3024 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3025 li $x20,0x20
3026 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3027 li $x30,0x30
3028 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3029 li $x40,0x40
3030 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3031 li $x50,0x50
3032 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3033 li $x60,0x60
3034 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3035 li $x70,0x70
3036 mtspr 256,r0
3037
3038 subi $rounds,$rounds,3 # -4 in total
3039
3040 lvx $rndkey0,$x00,$key1 # load key schedule
3041 lvx v30,$x10,$key1
3042 addi $key1,$key1,0x20
3043 lvx v31,$x00,$key1
3044 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3045 addi $key_,$sp,$FRAME+15
3046 mtctr $rounds
3047
3048 Load_xts_dec_key:
3049 ?vperm v24,v30,v31,$keyperm
3050 lvx v30,$x10,$key1
3051 addi $key1,$key1,0x20
3052 stvx v24,$x00,$key_ # off-load round[1]
3053 ?vperm v25,v31,v30,$keyperm
3054 lvx v31,$x00,$key1
3055 stvx v25,$x10,$key_ # off-load round[2]
3056 addi $key_,$key_,0x20
3057 bdnz Load_xts_dec_key
3058
3059 lvx v26,$x10,$key1
3060 ?vperm v24,v30,v31,$keyperm
3061 lvx v27,$x20,$key1
3062 stvx v24,$x00,$key_ # off-load round[3]
3063 ?vperm v25,v31,v26,$keyperm
3064 lvx v28,$x30,$key1
3065 stvx v25,$x10,$key_ # off-load round[4]
3066 addi $key_,$sp,$FRAME+15 # rewind $key_
3067 ?vperm v26,v26,v27,$keyperm
3068 lvx v29,$x40,$key1
3069 ?vperm v27,v27,v28,$keyperm
3070 lvx v30,$x50,$key1
3071 ?vperm v28,v28,v29,$keyperm
3072 lvx v31,$x60,$key1
3073 ?vperm v29,v29,v30,$keyperm
3074 lvx $twk5,$x70,$key1 # borrow $twk5
3075 ?vperm v30,v30,v31,$keyperm
3076 lvx v24,$x00,$key_ # pre-load round[1]
3077 ?vperm v31,v31,$twk5,$keyperm
3078 lvx v25,$x10,$key_ # pre-load round[2]
3079
3080 vperm $in0,$inout,$inptail,$inpperm
3081 subi $inp,$inp,31 # undo "caller"
3082 vxor $twk0,$tweak,$rndkey0
3083 vsrab $tmp,$tweak,$seven # next tweak value
3084 vaddubm $tweak,$tweak,$tweak
3085 vsldoi $tmp,$tmp,$tmp,15
3086 vand $tmp,$tmp,$eighty7
3087 vxor $out0,$in0,$twk0
3088 vxor $tweak,$tweak,$tmp
3089
3090 lvx_u $in1,$x10,$inp
3091 vxor $twk1,$tweak,$rndkey0
3092 vsrab $tmp,$tweak,$seven # next tweak value
3093 vaddubm $tweak,$tweak,$tweak
3094 vsldoi $tmp,$tmp,$tmp,15
3095 le?vperm $in1,$in1,$in1,$leperm
3096 vand $tmp,$tmp,$eighty7
3097 vxor $out1,$in1,$twk1
3098 vxor $tweak,$tweak,$tmp
3099
3100 lvx_u $in2,$x20,$inp
3101 andi. $taillen,$len,15
3102 vxor $twk2,$tweak,$rndkey0
3103 vsrab $tmp,$tweak,$seven # next tweak value
3104 vaddubm $tweak,$tweak,$tweak
3105 vsldoi $tmp,$tmp,$tmp,15
3106 le?vperm $in2,$in2,$in2,$leperm
3107 vand $tmp,$tmp,$eighty7
3108 vxor $out2,$in2,$twk2
3109 vxor $tweak,$tweak,$tmp
3110
3111 lvx_u $in3,$x30,$inp
3112 sub $len,$len,$taillen
3113 vxor $twk3,$tweak,$rndkey0
3114 vsrab $tmp,$tweak,$seven # next tweak value
3115 vaddubm $tweak,$tweak,$tweak
3116 vsldoi $tmp,$tmp,$tmp,15
3117 le?vperm $in3,$in3,$in3,$leperm
3118 vand $tmp,$tmp,$eighty7
3119 vxor $out3,$in3,$twk3
3120 vxor $tweak,$tweak,$tmp
3121
3122 lvx_u $in4,$x40,$inp
3123 subi $len,$len,0x60
3124 vxor $twk4,$tweak,$rndkey0
3125 vsrab $tmp,$tweak,$seven # next tweak value
3126 vaddubm $tweak,$tweak,$tweak
3127 vsldoi $tmp,$tmp,$tmp,15
3128 le?vperm $in4,$in4,$in4,$leperm
3129 vand $tmp,$tmp,$eighty7
3130 vxor $out4,$in4,$twk4
3131 vxor $tweak,$tweak,$tmp
3132
3133 lvx_u $in5,$x50,$inp
3134 addi $inp,$inp,0x60
3135 vxor $twk5,$tweak,$rndkey0
3136 vsrab $tmp,$tweak,$seven # next tweak value
3137 vaddubm $tweak,$tweak,$tweak
3138 vsldoi $tmp,$tmp,$tmp,15
3139 le?vperm $in5,$in5,$in5,$leperm
3140 vand $tmp,$tmp,$eighty7
3141 vxor $out5,$in5,$twk5
3142 vxor $tweak,$tweak,$tmp
3143
3144 vxor v31,v31,$rndkey0
3145 mtctr $rounds
3146 b Loop_xts_dec6x
3147
3148 .align 5
3149 Loop_xts_dec6x:
3150 vncipher $out0,$out0,v24
3151 vncipher $out1,$out1,v24
3152 vncipher $out2,$out2,v24
3153 vncipher $out3,$out3,v24
3154 vncipher $out4,$out4,v24
3155 vncipher $out5,$out5,v24
3156 lvx v24,$x20,$key_ # round[3]
3157 addi $key_,$key_,0x20
3158
3159 vncipher $out0,$out0,v25
3160 vncipher $out1,$out1,v25
3161 vncipher $out2,$out2,v25
3162 vncipher $out3,$out3,v25
3163 vncipher $out4,$out4,v25
3164 vncipher $out5,$out5,v25
3165 lvx v25,$x10,$key_ # round[4]
3166 bdnz Loop_xts_dec6x
3167
3168 subic $len,$len,96 # $len-=96
3169 vxor $in0,$twk0,v31 # xor with last round key
3170 vncipher $out0,$out0,v24
3171 vncipher $out1,$out1,v24
3172 vsrab $tmp,$tweak,$seven # next tweak value
3173 vxor $twk0,$tweak,$rndkey0
3174 vaddubm $tweak,$tweak,$tweak
3175 vncipher $out2,$out2,v24
3176 vncipher $out3,$out3,v24
3177 vsldoi $tmp,$tmp,$tmp,15
3178 vncipher $out4,$out4,v24
3179 vncipher $out5,$out5,v24
3180
3181 subfe. r0,r0,r0 # borrow?-1:0
3182 vand $tmp,$tmp,$eighty7
3183 vncipher $out0,$out0,v25
3184 vncipher $out1,$out1,v25
3185 vxor $tweak,$tweak,$tmp
3186 vncipher $out2,$out2,v25
3187 vncipher $out3,$out3,v25
3188 vxor $in1,$twk1,v31
3189 vsrab $tmp,$tweak,$seven # next tweak value
3190 vxor $twk1,$tweak,$rndkey0
3191 vncipher $out4,$out4,v25
3192 vncipher $out5,$out5,v25
3193
3194 and r0,r0,$len
3195 vaddubm $tweak,$tweak,$tweak
3196 vsldoi $tmp,$tmp,$tmp,15
3197 vncipher $out0,$out0,v26
3198 vncipher $out1,$out1,v26
3199 vand $tmp,$tmp,$eighty7
3200 vncipher $out2,$out2,v26
3201 vncipher $out3,$out3,v26
3202 vxor $tweak,$tweak,$tmp
3203 vncipher $out4,$out4,v26
3204 vncipher $out5,$out5,v26
3205
3206 add $inp,$inp,r0 # $inp is adjusted in such
3207 # way that at exit from the
3208 # loop inX-in5 are loaded
3209 # with last "words"
3210 vxor $in2,$twk2,v31
3211 vsrab $tmp,$tweak,$seven # next tweak value
3212 vxor $twk2,$tweak,$rndkey0
3213 vaddubm $tweak,$tweak,$tweak
3214 vncipher $out0,$out0,v27
3215 vncipher $out1,$out1,v27
3216 vsldoi $tmp,$tmp,$tmp,15
3217 vncipher $out2,$out2,v27
3218 vncipher $out3,$out3,v27
3219 vand $tmp,$tmp,$eighty7
3220 vncipher $out4,$out4,v27
3221 vncipher $out5,$out5,v27
3222
3223 addi $key_,$sp,$FRAME+15 # rewind $key_
3224 vxor $tweak,$tweak,$tmp
3225 vncipher $out0,$out0,v28
3226 vncipher $out1,$out1,v28
3227 vxor $in3,$twk3,v31
3228 vsrab $tmp,$tweak,$seven # next tweak value
3229 vxor $twk3,$tweak,$rndkey0
3230 vncipher $out2,$out2,v28
3231 vncipher $out3,$out3,v28
3232 vaddubm $tweak,$tweak,$tweak
3233 vsldoi $tmp,$tmp,$tmp,15
3234 vncipher $out4,$out4,v28
3235 vncipher $out5,$out5,v28
3236 lvx v24,$x00,$key_ # re-pre-load round[1]
3237 vand $tmp,$tmp,$eighty7
3238
3239 vncipher $out0,$out0,v29
3240 vncipher $out1,$out1,v29
3241 vxor $tweak,$tweak,$tmp
3242 vncipher $out2,$out2,v29
3243 vncipher $out3,$out3,v29
3244 vxor $in4,$twk4,v31
3245 vsrab $tmp,$tweak,$seven # next tweak value
3246 vxor $twk4,$tweak,$rndkey0
3247 vncipher $out4,$out4,v29
3248 vncipher $out5,$out5,v29
3249 lvx v25,$x10,$key_ # re-pre-load round[2]
3250 vaddubm $tweak,$tweak,$tweak
3251 vsldoi $tmp,$tmp,$tmp,15
3252
3253 vncipher $out0,$out0,v30
3254 vncipher $out1,$out1,v30
3255 vand $tmp,$tmp,$eighty7
3256 vncipher $out2,$out2,v30
3257 vncipher $out3,$out3,v30
3258 vxor $tweak,$tweak,$tmp
3259 vncipher $out4,$out4,v30
3260 vncipher $out5,$out5,v30
3261 vxor $in5,$twk5,v31
3262 vsrab $tmp,$tweak,$seven # next tweak value
3263 vxor $twk5,$tweak,$rndkey0
3264
3265 vncipherlast $out0,$out0,$in0
3266 lvx_u $in0,$x00,$inp # load next input block
3267 vaddubm $tweak,$tweak,$tweak
3268 vsldoi $tmp,$tmp,$tmp,15
3269 vncipherlast $out1,$out1,$in1
3270 lvx_u $in1,$x10,$inp
3271 vncipherlast $out2,$out2,$in2
3272 le?vperm $in0,$in0,$in0,$leperm
3273 lvx_u $in2,$x20,$inp
3274 vand $tmp,$tmp,$eighty7
3275 vncipherlast $out3,$out3,$in3
3276 le?vperm $in1,$in1,$in1,$leperm
3277 lvx_u $in3,$x30,$inp
3278 vncipherlast $out4,$out4,$in4
3279 le?vperm $in2,$in2,$in2,$leperm
3280 lvx_u $in4,$x40,$inp
3281 vxor $tweak,$tweak,$tmp
3282 vncipherlast $out5,$out5,$in5
3283 le?vperm $in3,$in3,$in3,$leperm
3284 lvx_u $in5,$x50,$inp
3285 addi $inp,$inp,0x60
3286 le?vperm $in4,$in4,$in4,$leperm
3287 le?vperm $in5,$in5,$in5,$leperm
3288
3289 le?vperm $out0,$out0,$out0,$leperm
3290 le?vperm $out1,$out1,$out1,$leperm
3291 stvx_u $out0,$x00,$out # store output
3292 vxor $out0,$in0,$twk0
3293 le?vperm $out2,$out2,$out2,$leperm
3294 stvx_u $out1,$x10,$out
3295 vxor $out1,$in1,$twk1
3296 le?vperm $out3,$out3,$out3,$leperm
3297 stvx_u $out2,$x20,$out
3298 vxor $out2,$in2,$twk2
3299 le?vperm $out4,$out4,$out4,$leperm
3300 stvx_u $out3,$x30,$out
3301 vxor $out3,$in3,$twk3
3302 le?vperm $out5,$out5,$out5,$leperm
3303 stvx_u $out4,$x40,$out
3304 vxor $out4,$in4,$twk4
3305 stvx_u $out5,$x50,$out
3306 vxor $out5,$in5,$twk5
3307 addi $out,$out,0x60
3308
3309 mtctr $rounds
3310 beq Loop_xts_dec6x # did $len-=96 borrow?
3311
3312 addic. $len,$len,0x60
3313 beq Lxts_dec6x_zero
3314 cmpwi $len,0x20
3315 blt Lxts_dec6x_one
3316 nop
3317 beq Lxts_dec6x_two
3318 cmpwi $len,0x40
3319 blt Lxts_dec6x_three
3320 nop
3321 beq Lxts_dec6x_four
3322
3323 Lxts_dec6x_five:
3324 vxor $out0,$in1,$twk0
3325 vxor $out1,$in2,$twk1
3326 vxor $out2,$in3,$twk2
3327 vxor $out3,$in4,$twk3
3328 vxor $out4,$in5,$twk4
3329
3330 bl _aesp8_xts_dec5x
3331
3332 le?vperm $out0,$out0,$out0,$leperm
3333 vmr $twk0,$twk5 # unused tweak
3334 vxor $twk1,$tweak,$rndkey0
3335 le?vperm $out1,$out1,$out1,$leperm
3336 stvx_u $out0,$x00,$out # store output
3337 vxor $out0,$in0,$twk1
3338 le?vperm $out2,$out2,$out2,$leperm
3339 stvx_u $out1,$x10,$out
3340 le?vperm $out3,$out3,$out3,$leperm
3341 stvx_u $out2,$x20,$out
3342 le?vperm $out4,$out4,$out4,$leperm
3343 stvx_u $out3,$x30,$out
3344 stvx_u $out4,$x40,$out
3345 addi $out,$out,0x50
3346 bne Lxts_dec6x_steal
3347 b Lxts_dec6x_done
3348
3349 .align 4
3350 Lxts_dec6x_four:
3351 vxor $out0,$in2,$twk0
3352 vxor $out1,$in3,$twk1
3353 vxor $out2,$in4,$twk2
3354 vxor $out3,$in5,$twk3
3355 vxor $out4,$out4,$out4
3356
3357 bl _aesp8_xts_dec5x
3358
3359 le?vperm $out0,$out0,$out0,$leperm
3360 vmr $twk0,$twk4 # unused tweak
3361 vmr $twk1,$twk5
3362 le?vperm $out1,$out1,$out1,$leperm
3363 stvx_u $out0,$x00,$out # store output
3364 vxor $out0,$in0,$twk5
3365 le?vperm $out2,$out2,$out2,$leperm
3366 stvx_u $out1,$x10,$out
3367 le?vperm $out3,$out3,$out3,$leperm
3368 stvx_u $out2,$x20,$out
3369 stvx_u $out3,$x30,$out
3370 addi $out,$out,0x40
3371 bne Lxts_dec6x_steal
3372 b Lxts_dec6x_done
3373
3374 .align 4
3375 Lxts_dec6x_three:
3376 vxor $out0,$in3,$twk0
3377 vxor $out1,$in4,$twk1
3378 vxor $out2,$in5,$twk2
3379 vxor $out3,$out3,$out3
3380 vxor $out4,$out4,$out4
3381
3382 bl _aesp8_xts_dec5x
3383
3384 le?vperm $out0,$out0,$out0,$leperm
3385 vmr $twk0,$twk3 # unused tweak
3386 vmr $twk1,$twk4
3387 le?vperm $out1,$out1,$out1,$leperm
3388 stvx_u $out0,$x00,$out # store output
3389 vxor $out0,$in0,$twk4
3390 le?vperm $out2,$out2,$out2,$leperm
3391 stvx_u $out1,$x10,$out
3392 stvx_u $out2,$x20,$out
3393 addi $out,$out,0x30
3394 bne Lxts_dec6x_steal
3395 b Lxts_dec6x_done
3396
3397 .align 4
3398 Lxts_dec6x_two:
3399 vxor $out0,$in4,$twk0
3400 vxor $out1,$in5,$twk1
3401 vxor $out2,$out2,$out2
3402 vxor $out3,$out3,$out3
3403 vxor $out4,$out4,$out4
3404
3405 bl _aesp8_xts_dec5x
3406
3407 le?vperm $out0,$out0,$out0,$leperm
3408 vmr $twk0,$twk2 # unused tweak
3409 vmr $twk1,$twk3
3410 le?vperm $out1,$out1,$out1,$leperm
3411 stvx_u $out0,$x00,$out # store output
3412 vxor $out0,$in0,$twk3
3413 stvx_u $out1,$x10,$out
3414 addi $out,$out,0x20
3415 bne Lxts_dec6x_steal
3416 b Lxts_dec6x_done
3417
3418 .align 4
3419 Lxts_dec6x_one:
3420 vxor $out0,$in5,$twk0
3421 nop
3422 Loop_xts_dec1x:
3423 vncipher $out0,$out0,v24
3424 lvx v24,$x20,$key_ # round[3]
3425 addi $key_,$key_,0x20
3426
3427 vncipher $out0,$out0,v25
3428 lvx v25,$x10,$key_ # round[4]
3429 bdnz Loop_xts_dec1x
3430
3431 subi r0,$taillen,1
3432 vncipher $out0,$out0,v24
3433
3434 andi. r0,r0,16
3435 cmpwi $taillen,0
3436 vncipher $out0,$out0,v25
3437
3438 sub $inp,$inp,r0
3439 vncipher $out0,$out0,v26
3440
3441 lvx_u $in0,0,$inp
3442 vncipher $out0,$out0,v27
3443
3444 addi $key_,$sp,$FRAME+15 # rewind $key_
3445 vncipher $out0,$out0,v28
3446 lvx v24,$x00,$key_ # re-pre-load round[1]
3447
3448 vncipher $out0,$out0,v29
3449 lvx v25,$x10,$key_ # re-pre-load round[2]
3450 vxor $twk0,$twk0,v31
3451
3452 le?vperm $in0,$in0,$in0,$leperm
3453 vncipher $out0,$out0,v30
3454
3455 mtctr $rounds
3456 vncipherlast $out0,$out0,$twk0
3457
3458 vmr $twk0,$twk1 # unused tweak
3459 vmr $twk1,$twk2
3460 le?vperm $out0,$out0,$out0,$leperm
3461 stvx_u $out0,$x00,$out # store output
3462 addi $out,$out,0x10
3463 vxor $out0,$in0,$twk2
3464 bne Lxts_dec6x_steal
3465 b Lxts_dec6x_done
3466
3467 .align 4
3468 Lxts_dec6x_zero:
3469 cmpwi $taillen,0
3470 beq Lxts_dec6x_done
3471
3472 lvx_u $in0,0,$inp
3473 le?vperm $in0,$in0,$in0,$leperm
3474 vxor $out0,$in0,$twk1
3475 Lxts_dec6x_steal:
3476 vncipher $out0,$out0,v24
3477 lvx v24,$x20,$key_ # round[3]
3478 addi $key_,$key_,0x20
3479
3480 vncipher $out0,$out0,v25
3481 lvx v25,$x10,$key_ # round[4]
3482 bdnz Lxts_dec6x_steal
3483
3484 add $inp,$inp,$taillen
3485 vncipher $out0,$out0,v24
3486
3487 cmpwi $taillen,0
3488 vncipher $out0,$out0,v25
3489
3490 lvx_u $in0,0,$inp
3491 vncipher $out0,$out0,v26
3492
3493 lvsr $inpperm,0,$taillen # $in5 is no more
3494 vncipher $out0,$out0,v27
3495
3496 addi $key_,$sp,$FRAME+15 # rewind $key_
3497 vncipher $out0,$out0,v28
3498 lvx v24,$x00,$key_ # re-pre-load round[1]
3499
3500 vncipher $out0,$out0,v29
3501 lvx v25,$x10,$key_ # re-pre-load round[2]
3502 vxor $twk1,$twk1,v31
3503
3504 le?vperm $in0,$in0,$in0,$leperm
3505 vncipher $out0,$out0,v30
3506
3507 vperm $in0,$in0,$in0,$inpperm
3508 vncipherlast $tmp,$out0,$twk1
3509
3510 le?vperm $out0,$tmp,$tmp,$leperm
3511 le?stvx_u $out0,0,$out
3512 be?stvx_u $tmp,0,$out
3513
3514 vxor $out0,$out0,$out0
3515 vspltisb $out1,-1
3516 vperm $out0,$out0,$out1,$inpperm
3517 vsel $out0,$in0,$tmp,$out0
3518 vxor $out0,$out0,$twk0
3519
3520 subi r3,$out,1
3521 mtctr $taillen
3522 Loop_xts_dec6x_steal:
3523 lbzu r0,1(r3)
3524 stb r0,16(r3)
3525 bdnz Loop_xts_dec6x_steal
3526
3527 li $taillen,0
3528 mtctr $rounds
3529 b Loop_xts_dec1x # one more time...
3530
3531 .align 4
3532 Lxts_dec6x_done:
3533 mtlr r7
3534 li r10,`$FRAME+15`
3535 li r11,`$FRAME+31`
3536 stvx $seven,r10,$sp # wipe copies of round keys
3537 addi r10,r10,32
3538 stvx $seven,r11,$sp
3539 addi r11,r11,32
3540 stvx $seven,r10,$sp
3541 addi r10,r10,32
3542 stvx $seven,r11,$sp
3543 addi r11,r11,32
3544 stvx $seven,r10,$sp
3545 addi r10,r10,32
3546 stvx $seven,r11,$sp
3547 addi r11,r11,32
3548 stvx $seven,r10,$sp
3549 addi r10,r10,32
3550 stvx $seven,r11,$sp
3551 addi r11,r11,32
3552
3553 mtspr 256,$vrsave
3554 lvx v20,r10,$sp # ABI says so
3555 addi r10,r10,32
3556 lvx v21,r11,$sp
3557 addi r11,r11,32
3558 lvx v22,r10,$sp
3559 addi r10,r10,32
3560 lvx v23,r11,$sp
3561 addi r11,r11,32
3562 lvx v24,r10,$sp
3563 addi r10,r10,32
3564 lvx v25,r11,$sp
3565 addi r11,r11,32
3566 lvx v26,r10,$sp
3567 addi r10,r10,32
3568 lvx v27,r11,$sp
3569 addi r11,r11,32
3570 lvx v28,r10,$sp
3571 addi r10,r10,32
3572 lvx v29,r11,$sp
3573 addi r11,r11,32
3574 lvx v30,r10,$sp
3575 lvx v31,r11,$sp
3576 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3577 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3578 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3579 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3580 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3581 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3582 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3583 blr
3584 .long 0
3585 .byte 0,12,0x04,1,0x80,6,6,0
3586 .long 0
3587
3588 .align 5
3589 _aesp8_xts_dec5x:
3590 vncipher $out0,$out0,v24
3591 vncipher $out1,$out1,v24
3592 vncipher $out2,$out2,v24
3593 vncipher $out3,$out3,v24
3594 vncipher $out4,$out4,v24
3595 lvx v24,$x20,$key_ # round[3]
3596 addi $key_,$key_,0x20
3597
3598 vncipher $out0,$out0,v25
3599 vncipher $out1,$out1,v25
3600 vncipher $out2,$out2,v25
3601 vncipher $out3,$out3,v25
3602 vncipher $out4,$out4,v25
3603 lvx v25,$x10,$key_ # round[4]
3604 bdnz _aesp8_xts_dec5x
3605
3606 subi r0,$taillen,1
3607 vncipher $out0,$out0,v24
3608 vncipher $out1,$out1,v24
3609 vncipher $out2,$out2,v24
3610 vncipher $out3,$out3,v24
3611 vncipher $out4,$out4,v24
3612
3613 andi. r0,r0,16
3614 cmpwi $taillen,0
3615 vncipher $out0,$out0,v25
3616 vncipher $out1,$out1,v25
3617 vncipher $out2,$out2,v25
3618 vncipher $out3,$out3,v25
3619 vncipher $out4,$out4,v25
3620 vxor $twk0,$twk0,v31
3621
3622 sub $inp,$inp,r0
3623 vncipher $out0,$out0,v26
3624 vncipher $out1,$out1,v26
3625 vncipher $out2,$out2,v26
3626 vncipher $out3,$out3,v26
3627 vncipher $out4,$out4,v26
3628 vxor $in1,$twk1,v31
3629
3630 vncipher $out0,$out0,v27
3631 lvx_u $in0,0,$inp
3632 vncipher $out1,$out1,v27
3633 vncipher $out2,$out2,v27
3634 vncipher $out3,$out3,v27
3635 vncipher $out4,$out4,v27
3636 vxor $in2,$twk2,v31
3637
3638 addi $key_,$sp,$FRAME+15 # rewind $key_
3639 vncipher $out0,$out0,v28
3640 vncipher $out1,$out1,v28
3641 vncipher $out2,$out2,v28
3642 vncipher $out3,$out3,v28
3643 vncipher $out4,$out4,v28
3644 lvx v24,$x00,$key_ # re-pre-load round[1]
3645 vxor $in3,$twk3,v31
3646
3647 vncipher $out0,$out0,v29
3648 le?vperm $in0,$in0,$in0,$leperm
3649 vncipher $out1,$out1,v29
3650 vncipher $out2,$out2,v29
3651 vncipher $out3,$out3,v29
3652 vncipher $out4,$out4,v29
3653 lvx v25,$x10,$key_ # re-pre-load round[2]
3654 vxor $in4,$twk4,v31
3655
3656 vncipher $out0,$out0,v30
3657 vncipher $out1,$out1,v30
3658 vncipher $out2,$out2,v30
3659 vncipher $out3,$out3,v30
3660 vncipher $out4,$out4,v30
3661
3662 vncipherlast $out0,$out0,$twk0
3663 vncipherlast $out1,$out1,$in1
3664 vncipherlast $out2,$out2,$in2
3665 vncipherlast $out3,$out3,$in3
3666 vncipherlast $out4,$out4,$in4
3667 mtctr $rounds
3668 blr
3669 .long 0
3670 .byte 0,12,0x14,0,0,0,0,0
3671 ___
3672 }} }}}
3673
3674 my $consts=1;
3675 foreach(split("\n",$code)) {
3676 s/\`([^\`]*)\`/eval($1)/geo;
3677
3678 # constants table endian-specific conversion
3679 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3680 my $conv=$3;
3681 my @bytes=();
3682
3683 # convert to endian-agnostic format
3684 if ($1 eq "long") {
3685 foreach (split(/,\s*/,$2)) {
3686 my $l = /^0/?oct:int;
3687 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3688 }
3689 } else {
3690 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3691 }
3692
3693 # little-endian conversion
3694 if ($flavour =~ /le$/o) {
3695 SWITCH: for($conv) {
3696 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3697 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3698 }
3699 }
3700
3701 #emit
3702 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3703 next;
3704 }
3705 $consts=0 if (m/Lconsts:/o); # end of table
3706
3707 # instructions prefixed with '?' are endian-specific and need
3708 # to be adjusted accordingly...
3709 if ($flavour =~ /le$/o) { # little-endian
3710 s/le\?//o or
3711 s/be\?/#be#/o or
3712 s/\?lvsr/lvsl/o or
3713 s/\?lvsl/lvsr/o or
3714 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3715 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3716 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3717 } else { # big-endian
3718 s/le\?/#le#/o or
3719 s/be\?//o or
3720 s/\?([a-z]+)/$1/o;
3721 }
3722
3723 print $_,"\n";
3724 }
3725
3726 close STDOUT;