3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
26 if ($flavour =~ /64/) {
33 } elsif ($flavour =~ /32/) {
40 } else { die "nonsense $flavour"; }
42 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
44 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
45 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
46 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
47 die "can't locate ppc-xlate.pl";
49 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
57 #########################################################################
58 {{{ Key setup procedures
#
59 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
60 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
61 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
70 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
71 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
72 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
77 mflr
$ptr #vvvvv "distance between . and rcon
82 .byte
0,12,0x14,0,0,0,0,0
83 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
85 .globl
.${prefix
}_set_encrypt_key
87 .${prefix
}_set_encrypt_key
:
91 $PUSH r11
,$LRSAVE($sp)
100 addi
$inp,$inp,15 # 15 is not typo
101 lvsr
$key,0,r9
# borrow $key
105 le?vspltisb
$mask,0x0f # borrow $mask
107 le?vxor
$key,$key,$mask # adjust for byte swap
110 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
112 vxor
$zero,$zero,$zero
115 ?lvsr
$outperm,0,$out
118 ?vperm
$outmask,$zero,$outmask,$outperm
128 vperm
$key,$in0,$in0,$mask # rotate-n-splat
129 vsldoi
$tmp,$zero,$in0,12 # >>32
130 vperm
$outtail,$in0,$in0,$outperm # rotate
131 vsel
$stage,$outhead,$outtail,$outmask
132 vmr
$outhead,$outtail
133 vcipherlast
$key,$key,$rcon
138 vsldoi
$tmp,$zero,$tmp,12 # >>32
140 vsldoi
$tmp,$zero,$tmp,12 # >>32
142 vadduwm
$rcon,$rcon,$rcon
146 lvx
$rcon,0,$ptr # last two round keys
148 vperm
$key,$in0,$in0,$mask # rotate-n-splat
149 vsldoi
$tmp,$zero,$in0,12 # >>32
150 vperm
$outtail,$in0,$in0,$outperm # rotate
151 vsel
$stage,$outhead,$outtail,$outmask
152 vmr
$outhead,$outtail
153 vcipherlast
$key,$key,$rcon
158 vsldoi
$tmp,$zero,$tmp,12 # >>32
160 vsldoi
$tmp,$zero,$tmp,12 # >>32
162 vadduwm
$rcon,$rcon,$rcon
165 vperm
$key,$in0,$in0,$mask # rotate-n-splat
166 vsldoi
$tmp,$zero,$in0,12 # >>32
167 vperm
$outtail,$in0,$in0,$outperm # rotate
168 vsel
$stage,$outhead,$outtail,$outmask
169 vmr
$outhead,$outtail
170 vcipherlast
$key,$key,$rcon
175 vsldoi
$tmp,$zero,$tmp,12 # >>32
177 vsldoi
$tmp,$zero,$tmp,12 # >>32
180 vperm
$outtail,$in0,$in0,$outperm # rotate
181 vsel
$stage,$outhead,$outtail,$outmask
182 vmr
$outhead,$outtail
185 addi
$inp,$out,15 # 15 is not typo
195 vperm
$outtail,$in0,$in0,$outperm # rotate
196 vsel
$stage,$outhead,$outtail,$outmask
197 vmr
$outhead,$outtail
200 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
201 vspltisb
$key,8 # borrow $key
203 vsububm
$mask,$mask,$key # adjust the mask
206 vperm
$key,$in1,$in1,$mask # roate-n-splat
207 vsldoi
$tmp,$zero,$in0,12 # >>32
208 vcipherlast
$key,$key,$rcon
211 vsldoi
$tmp,$zero,$tmp,12 # >>32
213 vsldoi
$tmp,$zero,$tmp,12 # >>32
216 vsldoi
$stage,$zero,$in1,8
219 vsldoi
$in1,$zero,$in1,12 # >>32
220 vadduwm
$rcon,$rcon,$rcon
224 vsldoi
$stage,$stage,$in0,8
226 vperm
$key,$in1,$in1,$mask # rotate-n-splat
227 vsldoi
$tmp,$zero,$in0,12 # >>32
228 vperm
$outtail,$stage,$stage,$outperm # rotate
229 vsel
$stage,$outhead,$outtail,$outmask
230 vmr
$outhead,$outtail
231 vcipherlast
$key,$key,$rcon
235 vsldoi
$stage,$in0,$in1,8
237 vsldoi
$tmp,$zero,$tmp,12 # >>32
238 vperm
$outtail,$stage,$stage,$outperm # rotate
239 vsel
$stage,$outhead,$outtail,$outmask
240 vmr
$outhead,$outtail
242 vsldoi
$tmp,$zero,$tmp,12 # >>32
249 vsldoi
$in1,$zero,$in1,12 # >>32
250 vadduwm
$rcon,$rcon,$rcon
254 vperm
$outtail,$in0,$in0,$outperm # rotate
255 vsel
$stage,$outhead,$outtail,$outmask
256 vmr
$outhead,$outtail
258 addi
$inp,$out,15 # 15 is not typo
271 vperm
$outtail,$in0,$in0,$outperm # rotate
272 vsel
$stage,$outhead,$outtail,$outmask
273 vmr
$outhead,$outtail
276 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
280 vperm
$key,$in1,$in1,$mask # rotate-n-splat
281 vsldoi
$tmp,$zero,$in0,12 # >>32
282 vperm
$outtail,$in1,$in1,$outperm # rotate
283 vsel
$stage,$outhead,$outtail,$outmask
284 vmr
$outhead,$outtail
285 vcipherlast
$key,$key,$rcon
290 vsldoi
$tmp,$zero,$tmp,12 # >>32
292 vsldoi
$tmp,$zero,$tmp,12 # >>32
294 vadduwm
$rcon,$rcon,$rcon
296 vperm
$outtail,$in0,$in0,$outperm # rotate
297 vsel
$stage,$outhead,$outtail,$outmask
298 vmr
$outhead,$outtail
300 addi
$inp,$out,15 # 15 is not typo
304 vspltw
$key,$in0,3 # just splat
305 vsldoi
$tmp,$zero,$in1,12 # >>32
309 vsldoi
$tmp,$zero,$tmp,12 # >>32
311 vsldoi
$tmp,$zero,$tmp,12 # >>32
319 lvx
$in1,0,$inp # redundant in aligned case
320 vsel
$in1,$outhead,$in1,$outmask
322 xor r3
,r3
,r3
# return value
328 .byte
0,12,0x14,1,0,0,3,0
330 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
332 .globl
.${prefix
}_set_decrypt_key
334 .${prefix
}_set_decrypt_key
:
335 $STU $sp,-$FRAME($sp)
337 $PUSH r10
,$FRAME+$LRSAVE($sp)
342 subi
$inp,$out,240 # first round key
343 srwi
$rounds,$rounds,1
344 add
$out,$inp,$cnt # last round key
368 xor r3
,r3
,r3
# return value
372 .byte
0,12,4,1,0x80,0,3,0
374 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
377 #########################################################################
378 {{{ Single block en
- and decrypt procedures
#
381 my $n = $dir eq "de" ?
"n" : "";
382 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
385 .globl
.${prefix
}_
${dir
}crypt
387 .${prefix
}_
${dir
}crypt:
388 lwz
$rounds,240($key)
391 li
$idx,15 # 15 is not typo
397 lvsl v2
,0,$inp # inpperm
399 ?lvsl v3
,0,r11
# outperm
402 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
404 ?lvsl v5
,0,$key # keyperm
405 srwi
$rounds,$rounds,1
408 subi
$rounds,$rounds,1
409 ?vperm v1
,v1
,v2
,v5
# align round key
431 v
${n
}cipherlast v0
,v0
,v1
435 li
$idx,15 # 15 is not typo
436 ?vperm v2
,v1
,v2
,v3
# outmask
438 lvx v1
,0,$out # outhead
439 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
449 .byte
0,12,0x14,0,0,0,3,0
451 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
457 #########################################################################
458 {{{ CBC en
- and decrypt procedures
#
459 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
460 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
461 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
464 .globl
.${prefix
}_cbc_encrypt
466 .${prefix
}_cbc_encrypt
:
470 cmpwi
$enc,0 # test direction
476 vxor
$rndkey0,$rndkey0,$rndkey0
477 le?vspltisb
$tmp,0x0f
479 lvx
$ivec,0,$ivp # load [unaligned] iv
481 lvx
$inptail,$idx,$ivp
482 le?vxor
$inpperm,$inpperm,$tmp
483 vperm
$ivec,$ivec,$inptail,$inpperm
486 ?lvsl
$keyperm,0,$key # prepare for unaligned key
487 lwz
$rounds,240($key)
489 lvsr
$inpperm,0,r11
# prepare for unaligned load
491 addi
$inp,$inp,15 # 15 is not typo
492 le?vxor
$inpperm,$inpperm,$tmp
494 ?lvsr
$outperm,0,$out # prepare for unaligned store
497 ?vperm
$outmask,$rndkey0,$outmask,$outperm
498 le?vxor
$outperm,$outperm,$tmp
500 srwi
$rounds,$rounds,1
502 subi
$rounds,$rounds,1
510 subi
$len,$len,16 # len-=16
513 vperm
$inout,$inout,$inptail,$inpperm
514 lvx
$rndkey1,$idx,$key
516 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
517 vxor
$inout,$inout,$rndkey0
518 lvx
$rndkey0,$idx,$key
520 vxor
$inout,$inout,$ivec
523 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
524 vcipher
$inout,$inout,$rndkey1
525 lvx
$rndkey1,$idx,$key
527 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
528 vcipher
$inout,$inout,$rndkey0
529 lvx
$rndkey0,$idx,$key
533 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
534 vcipher
$inout,$inout,$rndkey1
535 lvx
$rndkey1,$idx,$key
537 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
538 vcipherlast
$ivec,$inout,$rndkey0
541 vperm
$tmp,$ivec,$ivec,$outperm
542 vsel
$inout,$outhead,$tmp,$outmask
553 bge _aesp8_cbc_decrypt8x
558 subi
$len,$len,16 # len-=16
561 vperm
$tmp,$tmp,$inptail,$inpperm
562 lvx
$rndkey1,$idx,$key
564 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
565 vxor
$inout,$tmp,$rndkey0
566 lvx
$rndkey0,$idx,$key
570 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
571 vncipher
$inout,$inout,$rndkey1
572 lvx
$rndkey1,$idx,$key
574 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
575 vncipher
$inout,$inout,$rndkey0
576 lvx
$rndkey0,$idx,$key
580 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
581 vncipher
$inout,$inout,$rndkey1
582 lvx
$rndkey1,$idx,$key
584 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
585 vncipherlast
$inout,$inout,$rndkey0
588 vxor
$inout,$inout,$ivec
590 vperm
$tmp,$inout,$inout,$outperm
591 vsel
$inout,$outhead,$tmp,$outmask
599 lvx
$inout,0,$out # redundant in aligned case
600 vsel
$inout,$outhead,$inout,$outmask
603 neg
$enc,$ivp # write [unaligned] iv
604 li
$idx,15 # 15 is not typo
605 vxor
$rndkey0,$rndkey0,$rndkey0
607 le?vspltisb
$tmp,0x0f
608 ?lvsl
$outperm,0,$enc
609 ?vperm
$outmask,$rndkey0,$outmask,$outperm
610 le?vxor
$outperm,$outperm,$tmp
612 vperm
$ivec,$ivec,$ivec,$outperm
613 vsel
$inout,$outhead,$ivec,$outmask
614 lvx
$inptail,$idx,$ivp
616 vsel
$inout,$ivec,$inptail,$outmask
617 stvx
$inout,$idx,$ivp
622 .byte
0,12,0x14,0,0,0,6,0
625 #########################################################################
626 {{ Optimized CBC decrypt procedure
#
628 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
629 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
630 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
631 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
632 # v26-v31 last 6 round keys
633 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
637 _aesp8_cbc_decrypt8x
:
638 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
639 li r10
,`$FRAME+8*16+15`
640 li r11
,`$FRAME+8*16+31`
641 stvx v20
,r10
,$sp # ABI says so
664 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
666 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
668 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
670 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
672 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
674 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
676 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
680 subi
$rounds,$rounds,3 # -4 in total
681 subi
$len,$len,128 # bias
683 lvx
$rndkey0,$x00,$key # load key schedule
687 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
688 addi
$key_,$sp,$FRAME+15
692 ?vperm v24
,v30
,v31
,$keyperm
695 stvx v24
,$x00,$key_ # off-load round[1]
696 ?vperm v25
,v31
,v30
,$keyperm
698 stvx v25
,$x10,$key_ # off-load round[2]
699 addi
$key_,$key_,0x20
700 bdnz Load_cbc_dec_key
703 ?vperm v24
,v30
,v31
,$keyperm
705 stvx v24
,$x00,$key_ # off-load round[3]
706 ?vperm v25
,v31
,v26
,$keyperm
708 stvx v25
,$x10,$key_ # off-load round[4]
709 addi
$key_,$sp,$FRAME+15 # rewind $key_
710 ?vperm v26
,v26
,v27
,$keyperm
712 ?vperm v27
,v27
,v28
,$keyperm
714 ?vperm v28
,v28
,v29
,$keyperm
716 ?vperm v29
,v29
,v30
,$keyperm
717 lvx
$out0,$x70,$key # borrow $out0
718 ?vperm v30
,v30
,v31
,$keyperm
719 lvx v24
,$x00,$key_ # pre-load round[1]
720 ?vperm v31
,v31
,$out0,$keyperm
721 lvx v25
,$x10,$key_ # pre-load round[2]
723 #lvx $inptail,0,$inp # "caller" already did this
724 #addi $inp,$inp,15 # 15 is not typo
725 subi
$inp,$inp,15 # undo "caller"
728 lvx_u
$in0,$x00,$inp # load first 8 "words"
729 le?lvsl
$inpperm,0,$idx
730 le?vspltisb
$tmp,0x0f
732 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
734 le?vperm
$in0,$in0,$in0,$inpperm
736 le?vperm
$in1,$in1,$in1,$inpperm
738 le?vperm
$in2,$in2,$in2,$inpperm
739 vxor
$out0,$in0,$rndkey0
741 le?vperm
$in3,$in3,$in3,$inpperm
742 vxor
$out1,$in1,$rndkey0
744 le?vperm
$in4,$in4,$in4,$inpperm
745 vxor
$out2,$in2,$rndkey0
748 le?vperm
$in5,$in5,$in5,$inpperm
749 vxor
$out3,$in3,$rndkey0
750 le?vperm
$in6,$in6,$in6,$inpperm
751 vxor
$out4,$in4,$rndkey0
752 le?vperm
$in7,$in7,$in7,$inpperm
753 vxor
$out5,$in5,$rndkey0
754 vxor
$out6,$in6,$rndkey0
755 vxor
$out7,$in7,$rndkey0
761 vncipher
$out0,$out0,v24
762 vncipher
$out1,$out1,v24
763 vncipher
$out2,$out2,v24
764 vncipher
$out3,$out3,v24
765 vncipher
$out4,$out4,v24
766 vncipher
$out5,$out5,v24
767 vncipher
$out6,$out6,v24
768 vncipher
$out7,$out7,v24
769 lvx v24
,$x20,$key_ # round[3]
770 addi
$key_,$key_,0x20
772 vncipher
$out0,$out0,v25
773 vncipher
$out1,$out1,v25
774 vncipher
$out2,$out2,v25
775 vncipher
$out3,$out3,v25
776 vncipher
$out4,$out4,v25
777 vncipher
$out5,$out5,v25
778 vncipher
$out6,$out6,v25
779 vncipher
$out7,$out7,v25
780 lvx v25
,$x10,$key_ # round[4]
783 subic
$len,$len,128 # $len-=128
784 vncipher
$out0,$out0,v24
785 vncipher
$out1,$out1,v24
786 vncipher
$out2,$out2,v24
787 vncipher
$out3,$out3,v24
788 vncipher
$out4,$out4,v24
789 vncipher
$out5,$out5,v24
790 vncipher
$out6,$out6,v24
791 vncipher
$out7,$out7,v24
793 subfe
. r0
,r0
,r0
# borrow?-1:0
794 vncipher
$out0,$out0,v25
795 vncipher
$out1,$out1,v25
796 vncipher
$out2,$out2,v25
797 vncipher
$out3,$out3,v25
798 vncipher
$out4,$out4,v25
799 vncipher
$out5,$out5,v25
800 vncipher
$out6,$out6,v25
801 vncipher
$out7,$out7,v25
804 vncipher
$out0,$out0,v26
805 vncipher
$out1,$out1,v26
806 vncipher
$out2,$out2,v26
807 vncipher
$out3,$out3,v26
808 vncipher
$out4,$out4,v26
809 vncipher
$out5,$out5,v26
810 vncipher
$out6,$out6,v26
811 vncipher
$out7,$out7,v26
813 add
$inp,$inp,r0
# $inp is adjusted in such
814 # way that at exit from the
815 # loop inX-in7 are loaded
817 vncipher
$out0,$out0,v27
818 vncipher
$out1,$out1,v27
819 vncipher
$out2,$out2,v27
820 vncipher
$out3,$out3,v27
821 vncipher
$out4,$out4,v27
822 vncipher
$out5,$out5,v27
823 vncipher
$out6,$out6,v27
824 vncipher
$out7,$out7,v27
826 addi
$key_,$sp,$FRAME+15 # rewind $key_
827 vncipher
$out0,$out0,v28
828 vncipher
$out1,$out1,v28
829 vncipher
$out2,$out2,v28
830 vncipher
$out3,$out3,v28
831 vncipher
$out4,$out4,v28
832 vncipher
$out5,$out5,v28
833 vncipher
$out6,$out6,v28
834 vncipher
$out7,$out7,v28
835 lvx v24
,$x00,$key_ # re-pre-load round[1]
837 vncipher
$out0,$out0,v29
838 vncipher
$out1,$out1,v29
839 vncipher
$out2,$out2,v29
840 vncipher
$out3,$out3,v29
841 vncipher
$out4,$out4,v29
842 vncipher
$out5,$out5,v29
843 vncipher
$out6,$out6,v29
844 vncipher
$out7,$out7,v29
845 lvx v25
,$x10,$key_ # re-pre-load round[2]
847 vncipher
$out0,$out0,v30
848 vxor
$ivec,$ivec,v31
# xor with last round key
849 vncipher
$out1,$out1,v30
851 vncipher
$out2,$out2,v30
853 vncipher
$out3,$out3,v30
855 vncipher
$out4,$out4,v30
857 vncipher
$out5,$out5,v30
859 vncipher
$out6,$out6,v30
861 vncipher
$out7,$out7,v30
864 vncipherlast
$out0,$out0,$ivec
865 vncipherlast
$out1,$out1,$in0
866 lvx_u
$in0,$x00,$inp # load next input block
867 vncipherlast
$out2,$out2,$in1
869 vncipherlast
$out3,$out3,$in2
870 le?vperm
$in0,$in0,$in0,$inpperm
872 vncipherlast
$out4,$out4,$in3
873 le?vperm
$in1,$in1,$in1,$inpperm
875 vncipherlast
$out5,$out5,$in4
876 le?vperm
$in2,$in2,$in2,$inpperm
878 vncipherlast
$out6,$out6,$in5
879 le?vperm
$in3,$in3,$in3,$inpperm
881 vncipherlast
$out7,$out7,$in6
882 le?vperm
$in4,$in4,$in4,$inpperm
885 le?vperm
$in5,$in5,$in5,$inpperm
889 le?vperm
$out0,$out0,$out0,$inpperm
890 le?vperm
$out1,$out1,$out1,$inpperm
891 stvx_u
$out0,$x00,$out
892 le?vperm
$in6,$in6,$in6,$inpperm
893 vxor
$out0,$in0,$rndkey0
894 le?vperm
$out2,$out2,$out2,$inpperm
895 stvx_u
$out1,$x10,$out
896 le?vperm
$in7,$in7,$in7,$inpperm
897 vxor
$out1,$in1,$rndkey0
898 le?vperm
$out3,$out3,$out3,$inpperm
899 stvx_u
$out2,$x20,$out
900 vxor
$out2,$in2,$rndkey0
901 le?vperm
$out4,$out4,$out4,$inpperm
902 stvx_u
$out3,$x30,$out
903 vxor
$out3,$in3,$rndkey0
904 le?vperm
$out5,$out5,$out5,$inpperm
905 stvx_u
$out4,$x40,$out
906 vxor
$out4,$in4,$rndkey0
907 le?vperm
$out6,$out6,$out6,$inpperm
908 stvx_u
$out5,$x50,$out
909 vxor
$out5,$in5,$rndkey0
910 le?vperm
$out7,$out7,$out7,$inpperm
911 stvx_u
$out6,$x60,$out
912 vxor
$out6,$in6,$rndkey0
913 stvx_u
$out7,$x70,$out
915 vxor
$out7,$in7,$rndkey0
918 beq Loop_cbc_dec8x
# did $len-=128 borrow?
925 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
926 vncipher
$out1,$out1,v24
927 vncipher
$out2,$out2,v24
928 vncipher
$out3,$out3,v24
929 vncipher
$out4,$out4,v24
930 vncipher
$out5,$out5,v24
931 vncipher
$out6,$out6,v24
932 vncipher
$out7,$out7,v24
933 lvx v24
,$x20,$key_ # round[3]
934 addi
$key_,$key_,0x20
936 vncipher
$out1,$out1,v25
937 vncipher
$out2,$out2,v25
938 vncipher
$out3,$out3,v25
939 vncipher
$out4,$out4,v25
940 vncipher
$out5,$out5,v25
941 vncipher
$out6,$out6,v25
942 vncipher
$out7,$out7,v25
943 lvx v25
,$x10,$key_ # round[4]
944 bdnz Loop_cbc_dec8x_tail
946 vncipher
$out1,$out1,v24
947 vncipher
$out2,$out2,v24
948 vncipher
$out3,$out3,v24
949 vncipher
$out4,$out4,v24
950 vncipher
$out5,$out5,v24
951 vncipher
$out6,$out6,v24
952 vncipher
$out7,$out7,v24
954 vncipher
$out1,$out1,v25
955 vncipher
$out2,$out2,v25
956 vncipher
$out3,$out3,v25
957 vncipher
$out4,$out4,v25
958 vncipher
$out5,$out5,v25
959 vncipher
$out6,$out6,v25
960 vncipher
$out7,$out7,v25
962 vncipher
$out1,$out1,v26
963 vncipher
$out2,$out2,v26
964 vncipher
$out3,$out3,v26
965 vncipher
$out4,$out4,v26
966 vncipher
$out5,$out5,v26
967 vncipher
$out6,$out6,v26
968 vncipher
$out7,$out7,v26
970 vncipher
$out1,$out1,v27
971 vncipher
$out2,$out2,v27
972 vncipher
$out3,$out3,v27
973 vncipher
$out4,$out4,v27
974 vncipher
$out5,$out5,v27
975 vncipher
$out6,$out6,v27
976 vncipher
$out7,$out7,v27
978 vncipher
$out1,$out1,v28
979 vncipher
$out2,$out2,v28
980 vncipher
$out3,$out3,v28
981 vncipher
$out4,$out4,v28
982 vncipher
$out5,$out5,v28
983 vncipher
$out6,$out6,v28
984 vncipher
$out7,$out7,v28
986 vncipher
$out1,$out1,v29
987 vncipher
$out2,$out2,v29
988 vncipher
$out3,$out3,v29
989 vncipher
$out4,$out4,v29
990 vncipher
$out5,$out5,v29
991 vncipher
$out6,$out6,v29
992 vncipher
$out7,$out7,v29
994 vncipher
$out1,$out1,v30
995 vxor
$ivec,$ivec,v31
# last round key
996 vncipher
$out2,$out2,v30
998 vncipher
$out3,$out3,v30
1000 vncipher
$out4,$out4,v30
1002 vncipher
$out5,$out5,v30
1004 vncipher
$out6,$out6,v30
1006 vncipher
$out7,$out7,v30
1009 cmplwi
$len,32 # switch($len)
1014 blt Lcbc_dec8x_three
1023 vncipherlast
$out1,$out1,$ivec
1024 vncipherlast
$out2,$out2,$in1
1025 vncipherlast
$out3,$out3,$in2
1026 vncipherlast
$out4,$out4,$in3
1027 vncipherlast
$out5,$out5,$in4
1028 vncipherlast
$out6,$out6,$in5
1029 vncipherlast
$out7,$out7,$in6
1032 le?vperm
$out1,$out1,$out1,$inpperm
1033 le?vperm
$out2,$out2,$out2,$inpperm
1034 stvx_u
$out1,$x00,$out
1035 le?vperm
$out3,$out3,$out3,$inpperm
1036 stvx_u
$out2,$x10,$out
1037 le?vperm
$out4,$out4,$out4,$inpperm
1038 stvx_u
$out3,$x20,$out
1039 le?vperm
$out5,$out5,$out5,$inpperm
1040 stvx_u
$out4,$x30,$out
1041 le?vperm
$out6,$out6,$out6,$inpperm
1042 stvx_u
$out5,$x40,$out
1043 le?vperm
$out7,$out7,$out7,$inpperm
1044 stvx_u
$out6,$x50,$out
1045 stvx_u
$out7,$x60,$out
1051 vncipherlast
$out2,$out2,$ivec
1052 vncipherlast
$out3,$out3,$in2
1053 vncipherlast
$out4,$out4,$in3
1054 vncipherlast
$out5,$out5,$in4
1055 vncipherlast
$out6,$out6,$in5
1056 vncipherlast
$out7,$out7,$in6
1059 le?vperm
$out2,$out2,$out2,$inpperm
1060 le?vperm
$out3,$out3,$out3,$inpperm
1061 stvx_u
$out2,$x00,$out
1062 le?vperm
$out4,$out4,$out4,$inpperm
1063 stvx_u
$out3,$x10,$out
1064 le?vperm
$out5,$out5,$out5,$inpperm
1065 stvx_u
$out4,$x20,$out
1066 le?vperm
$out6,$out6,$out6,$inpperm
1067 stvx_u
$out5,$x30,$out
1068 le?vperm
$out7,$out7,$out7,$inpperm
1069 stvx_u
$out6,$x40,$out
1070 stvx_u
$out7,$x50,$out
1076 vncipherlast
$out3,$out3,$ivec
1077 vncipherlast
$out4,$out4,$in3
1078 vncipherlast
$out5,$out5,$in4
1079 vncipherlast
$out6,$out6,$in5
1080 vncipherlast
$out7,$out7,$in6
1083 le?vperm
$out3,$out3,$out3,$inpperm
1084 le?vperm
$out4,$out4,$out4,$inpperm
1085 stvx_u
$out3,$x00,$out
1086 le?vperm
$out5,$out5,$out5,$inpperm
1087 stvx_u
$out4,$x10,$out
1088 le?vperm
$out6,$out6,$out6,$inpperm
1089 stvx_u
$out5,$x20,$out
1090 le?vperm
$out7,$out7,$out7,$inpperm
1091 stvx_u
$out6,$x30,$out
1092 stvx_u
$out7,$x40,$out
1098 vncipherlast
$out4,$out4,$ivec
1099 vncipherlast
$out5,$out5,$in4
1100 vncipherlast
$out6,$out6,$in5
1101 vncipherlast
$out7,$out7,$in6
1104 le?vperm
$out4,$out4,$out4,$inpperm
1105 le?vperm
$out5,$out5,$out5,$inpperm
1106 stvx_u
$out4,$x00,$out
1107 le?vperm
$out6,$out6,$out6,$inpperm
1108 stvx_u
$out5,$x10,$out
1109 le?vperm
$out7,$out7,$out7,$inpperm
1110 stvx_u
$out6,$x20,$out
1111 stvx_u
$out7,$x30,$out
1117 vncipherlast
$out5,$out5,$ivec
1118 vncipherlast
$out6,$out6,$in5
1119 vncipherlast
$out7,$out7,$in6
1122 le?vperm
$out5,$out5,$out5,$inpperm
1123 le?vperm
$out6,$out6,$out6,$inpperm
1124 stvx_u
$out5,$x00,$out
1125 le?vperm
$out7,$out7,$out7,$inpperm
1126 stvx_u
$out6,$x10,$out
1127 stvx_u
$out7,$x20,$out
1133 vncipherlast
$out6,$out6,$ivec
1134 vncipherlast
$out7,$out7,$in6
1137 le?vperm
$out6,$out6,$out6,$inpperm
1138 le?vperm
$out7,$out7,$out7,$inpperm
1139 stvx_u
$out6,$x00,$out
1140 stvx_u
$out7,$x10,$out
1146 vncipherlast
$out7,$out7,$ivec
1149 le?vperm
$out7,$out7,$out7,$inpperm
1154 le?vperm
$ivec,$ivec,$ivec,$inpperm
1155 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1159 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1161 stvx
$inpperm,r11
,$sp
1163 stvx
$inpperm,r10
,$sp
1165 stvx
$inpperm,r11
,$sp
1167 stvx
$inpperm,r10
,$sp
1169 stvx
$inpperm,r11
,$sp
1171 stvx
$inpperm,r10
,$sp
1173 stvx
$inpperm,r11
,$sp
1177 lvx v20
,r10
,$sp # ABI says so
1199 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1200 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1201 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1202 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1203 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1204 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1205 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1208 .byte
0,12,0x14,0,0x80,6,6,0
1210 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1215 foreach(split("\n",$code)) {
1216 s/\`([^\`]*)\`/eval($1)/geo;
1218 # constants table endian-specific conversion
1219 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1223 # convert to endian-agnostic format
1225 foreach (split(/,\s*/,$2)) {
1226 my $l = /^0/?
oct:int;
1227 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1230 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
1233 # little-endian conversion
1234 if ($flavour =~ /le$/o) {
1235 SWITCH
: for($conv) {
1236 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1237 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1242 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1245 $consts=0 if (m/Lconsts:/o); # end of table
1247 # instructions prefixed with '?' are endian-specific and need
1248 # to be adjusted accordingly...
1249 if ($flavour =~ /le$/o) { # little-endian
1254 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1255 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1256 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1257 } else { # big-endian