2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 if ($flavour =~ /64/) {
41 } elsif ($flavour =~ /32/) {
49 } else { die "nonsense $flavour"; }
51 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
53 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
54 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
55 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
56 die "can't locate ppc-xlate.pl";
58 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
66 #########################################################################
67 {{{ # Key setup procedures #
68 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
69 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
70 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
79 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
80 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
81 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
86 mflr
$ptr #vvvvv "distance between . and rcon
91 .byte
0,12,0x14,0,0,0,0,0
92 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
94 .globl
.${prefix
}_set_encrypt_key
96 .${prefix
}_set_encrypt_key
:
99 $PUSH r11
,$LRSAVE($sp)
103 beq
- Lenc_key_abort
# if ($inp==0) return -1;
105 beq
- Lenc_key_abort
# if ($out==0) return -1;
123 addi
$inp,$inp,15 # 15 is not typo
124 lvsr
$key,0,r9
# borrow $key
128 le?vspltisb
$mask,0x0f # borrow $mask
130 le?vxor
$key,$key,$mask # adjust for byte swap
133 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
135 vxor
$zero,$zero,$zero
138 ?lvsr
$outperm,0,$out
141 ?vperm
$outmask,$zero,$outmask,$outperm
151 vperm
$key,$in0,$in0,$mask # rotate-n-splat
152 vsldoi
$tmp,$zero,$in0,12 # >>32
153 vperm
$outtail,$in0,$in0,$outperm # rotate
154 vsel
$stage,$outhead,$outtail,$outmask
155 vmr
$outhead,$outtail
156 vcipherlast
$key,$key,$rcon
161 vsldoi
$tmp,$zero,$tmp,12 # >>32
163 vsldoi
$tmp,$zero,$tmp,12 # >>32
165 vadduwm
$rcon,$rcon,$rcon
169 lvx
$rcon,0,$ptr # last two round keys
171 vperm
$key,$in0,$in0,$mask # rotate-n-splat
172 vsldoi
$tmp,$zero,$in0,12 # >>32
173 vperm
$outtail,$in0,$in0,$outperm # rotate
174 vsel
$stage,$outhead,$outtail,$outmask
175 vmr
$outhead,$outtail
176 vcipherlast
$key,$key,$rcon
181 vsldoi
$tmp,$zero,$tmp,12 # >>32
183 vsldoi
$tmp,$zero,$tmp,12 # >>32
185 vadduwm
$rcon,$rcon,$rcon
188 vperm
$key,$in0,$in0,$mask # rotate-n-splat
189 vsldoi
$tmp,$zero,$in0,12 # >>32
190 vperm
$outtail,$in0,$in0,$outperm # rotate
191 vsel
$stage,$outhead,$outtail,$outmask
192 vmr
$outhead,$outtail
193 vcipherlast
$key,$key,$rcon
198 vsldoi
$tmp,$zero,$tmp,12 # >>32
200 vsldoi
$tmp,$zero,$tmp,12 # >>32
203 vperm
$outtail,$in0,$in0,$outperm # rotate
204 vsel
$stage,$outhead,$outtail,$outmask
205 vmr
$outhead,$outtail
208 addi
$inp,$out,15 # 15 is not typo
218 vperm
$outtail,$in0,$in0,$outperm # rotate
219 vsel
$stage,$outhead,$outtail,$outmask
220 vmr
$outhead,$outtail
223 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
224 vspltisb
$key,8 # borrow $key
226 vsububm
$mask,$mask,$key # adjust the mask
229 vperm
$key,$in1,$in1,$mask # roate-n-splat
230 vsldoi
$tmp,$zero,$in0,12 # >>32
231 vcipherlast
$key,$key,$rcon
234 vsldoi
$tmp,$zero,$tmp,12 # >>32
236 vsldoi
$tmp,$zero,$tmp,12 # >>32
239 vsldoi
$stage,$zero,$in1,8
242 vsldoi
$in1,$zero,$in1,12 # >>32
243 vadduwm
$rcon,$rcon,$rcon
247 vsldoi
$stage,$stage,$in0,8
249 vperm
$key,$in1,$in1,$mask # rotate-n-splat
250 vsldoi
$tmp,$zero,$in0,12 # >>32
251 vperm
$outtail,$stage,$stage,$outperm # rotate
252 vsel
$stage,$outhead,$outtail,$outmask
253 vmr
$outhead,$outtail
254 vcipherlast
$key,$key,$rcon
258 vsldoi
$stage,$in0,$in1,8
260 vsldoi
$tmp,$zero,$tmp,12 # >>32
261 vperm
$outtail,$stage,$stage,$outperm # rotate
262 vsel
$stage,$outhead,$outtail,$outmask
263 vmr
$outhead,$outtail
265 vsldoi
$tmp,$zero,$tmp,12 # >>32
272 vsldoi
$in1,$zero,$in1,12 # >>32
273 vadduwm
$rcon,$rcon,$rcon
277 vperm
$outtail,$in0,$in0,$outperm # rotate
278 vsel
$stage,$outhead,$outtail,$outmask
279 vmr
$outhead,$outtail
281 addi
$inp,$out,15 # 15 is not typo
294 vperm
$outtail,$in0,$in0,$outperm # rotate
295 vsel
$stage,$outhead,$outtail,$outmask
296 vmr
$outhead,$outtail
299 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
303 vperm
$key,$in1,$in1,$mask # rotate-n-splat
304 vsldoi
$tmp,$zero,$in0,12 # >>32
305 vperm
$outtail,$in1,$in1,$outperm # rotate
306 vsel
$stage,$outhead,$outtail,$outmask
307 vmr
$outhead,$outtail
308 vcipherlast
$key,$key,$rcon
313 vsldoi
$tmp,$zero,$tmp,12 # >>32
315 vsldoi
$tmp,$zero,$tmp,12 # >>32
317 vadduwm
$rcon,$rcon,$rcon
319 vperm
$outtail,$in0,$in0,$outperm # rotate
320 vsel
$stage,$outhead,$outtail,$outmask
321 vmr
$outhead,$outtail
323 addi
$inp,$out,15 # 15 is not typo
327 vspltw
$key,$in0,3 # just splat
328 vsldoi
$tmp,$zero,$in1,12 # >>32
332 vsldoi
$tmp,$zero,$tmp,12 # >>32
334 vsldoi
$tmp,$zero,$tmp,12 # >>32
342 lvx
$in1,0,$inp # redundant in aligned case
343 vsel
$in1,$outhead,$in1,$outmask
353 .byte
0,12,0x14,1,0,0,3,0
355 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
357 .globl
.${prefix
}_set_decrypt_key
359 .${prefix
}_set_decrypt_key
:
360 $STU $sp,-$FRAME($sp)
362 $PUSH r10
,$FRAME+$LRSAVE($sp)
370 subi
$inp,$out,240 # first round key
371 srwi
$rounds,$rounds,1
372 add
$out,$inp,$cnt # last round key
396 xor r3
,r3
,r3
# return value
401 .byte
0,12,4,1,0x80,0,3,0
403 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
406 #########################################################################
407 {{{ # Single block en- and decrypt procedures #
410 my $n = $dir eq "de" ?
"n" : "";
411 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
414 .globl
.${prefix
}_
${dir
}crypt
416 .${prefix
}_
${dir
}crypt:
417 lwz
$rounds,240($key)
420 li
$idx,15 # 15 is not typo
426 lvsl v2
,0,$inp # inpperm
428 ?lvsl v3
,0,r11
# outperm
431 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
433 ?lvsl v5
,0,$key # keyperm
434 srwi
$rounds,$rounds,1
437 subi
$rounds,$rounds,1
438 ?vperm v1
,v1
,v2
,v5
# align round key
460 v
${n
}cipherlast v0
,v0
,v1
464 li
$idx,15 # 15 is not typo
465 ?vperm v2
,v1
,v2
,v3
# outmask
467 lvx v1
,0,$out # outhead
468 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
478 .byte
0,12,0x14,0,0,0,3,0
480 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
486 #########################################################################
487 {{{ # CBC en- and decrypt procedures #
488 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
489 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
490 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
493 .globl
.${prefix
}_cbc_encrypt
495 .${prefix
}_cbc_encrypt
:
499 cmpwi
$enc,0 # test direction
505 vxor
$rndkey0,$rndkey0,$rndkey0
506 le?vspltisb
$tmp,0x0f
508 lvx
$ivec,0,$ivp # load [unaligned] iv
510 lvx
$inptail,$idx,$ivp
511 le?vxor
$inpperm,$inpperm,$tmp
512 vperm
$ivec,$ivec,$inptail,$inpperm
515 ?lvsl
$keyperm,0,$key # prepare for unaligned key
516 lwz
$rounds,240($key)
518 lvsr
$inpperm,0,r11
# prepare for unaligned load
520 addi
$inp,$inp,15 # 15 is not typo
521 le?vxor
$inpperm,$inpperm,$tmp
523 ?lvsr
$outperm,0,$out # prepare for unaligned store
526 ?vperm
$outmask,$rndkey0,$outmask,$outperm
527 le?vxor
$outperm,$outperm,$tmp
529 srwi
$rounds,$rounds,1
531 subi
$rounds,$rounds,1
539 subi
$len,$len,16 # len-=16
542 vperm
$inout,$inout,$inptail,$inpperm
543 lvx
$rndkey1,$idx,$key
545 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
546 vxor
$inout,$inout,$rndkey0
547 lvx
$rndkey0,$idx,$key
549 vxor
$inout,$inout,$ivec
552 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
553 vcipher
$inout,$inout,$rndkey1
554 lvx
$rndkey1,$idx,$key
556 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
557 vcipher
$inout,$inout,$rndkey0
558 lvx
$rndkey0,$idx,$key
562 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
563 vcipher
$inout,$inout,$rndkey1
564 lvx
$rndkey1,$idx,$key
566 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
567 vcipherlast
$ivec,$inout,$rndkey0
570 vperm
$tmp,$ivec,$ivec,$outperm
571 vsel
$inout,$outhead,$tmp,$outmask
582 bge _aesp8_cbc_decrypt8x
587 subi
$len,$len,16 # len-=16
590 vperm
$tmp,$tmp,$inptail,$inpperm
591 lvx
$rndkey1,$idx,$key
593 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
594 vxor
$inout,$tmp,$rndkey0
595 lvx
$rndkey0,$idx,$key
599 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
600 vncipher
$inout,$inout,$rndkey1
601 lvx
$rndkey1,$idx,$key
603 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
604 vncipher
$inout,$inout,$rndkey0
605 lvx
$rndkey0,$idx,$key
609 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
610 vncipher
$inout,$inout,$rndkey1
611 lvx
$rndkey1,$idx,$key
613 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
614 vncipherlast
$inout,$inout,$rndkey0
617 vxor
$inout,$inout,$ivec
619 vperm
$tmp,$inout,$inout,$outperm
620 vsel
$inout,$outhead,$tmp,$outmask
628 lvx
$inout,0,$out # redundant in aligned case
629 vsel
$inout,$outhead,$inout,$outmask
632 neg
$enc,$ivp # write [unaligned] iv
633 li
$idx,15 # 15 is not typo
634 vxor
$rndkey0,$rndkey0,$rndkey0
636 le?vspltisb
$tmp,0x0f
637 ?lvsl
$outperm,0,$enc
638 ?vperm
$outmask,$rndkey0,$outmask,$outperm
639 le?vxor
$outperm,$outperm,$tmp
641 vperm
$ivec,$ivec,$ivec,$outperm
642 vsel
$inout,$outhead,$ivec,$outmask
643 lvx
$inptail,$idx,$ivp
645 vsel
$inout,$ivec,$inptail,$outmask
646 stvx
$inout,$idx,$ivp
651 .byte
0,12,0x14,0,0,0,6,0
654 #########################################################################
655 {{ # Optimized CBC decrypt procedure #
657 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
658 $x00=0 if ($flavour =~ /osx/);
659 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
660 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
661 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
662 # v26-v31 last 6 round keys
663 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
667 _aesp8_cbc_decrypt8x
:
668 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
669 li r10
,`$FRAME+8*16+15`
670 li r11
,`$FRAME+8*16+31`
671 stvx v20
,r10
,$sp # ABI says so
694 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
696 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
698 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
700 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
702 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
704 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
706 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
710 subi
$rounds,$rounds,3 # -4 in total
711 subi
$len,$len,128 # bias
713 lvx
$rndkey0,$x00,$key # load key schedule
717 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
718 addi
$key_,$sp,$FRAME+15
722 ?vperm v24
,v30
,v31
,$keyperm
725 stvx v24
,$x00,$key_ # off-load round[1]
726 ?vperm v25
,v31
,v30
,$keyperm
728 stvx v25
,$x10,$key_ # off-load round[2]
729 addi
$key_,$key_,0x20
730 bdnz Load_cbc_dec_key
733 ?vperm v24
,v30
,v31
,$keyperm
735 stvx v24
,$x00,$key_ # off-load round[3]
736 ?vperm v25
,v31
,v26
,$keyperm
738 stvx v25
,$x10,$key_ # off-load round[4]
739 addi
$key_,$sp,$FRAME+15 # rewind $key_
740 ?vperm v26
,v26
,v27
,$keyperm
742 ?vperm v27
,v27
,v28
,$keyperm
744 ?vperm v28
,v28
,v29
,$keyperm
746 ?vperm v29
,v29
,v30
,$keyperm
747 lvx
$out0,$x70,$key # borrow $out0
748 ?vperm v30
,v30
,v31
,$keyperm
749 lvx v24
,$x00,$key_ # pre-load round[1]
750 ?vperm v31
,v31
,$out0,$keyperm
751 lvx v25
,$x10,$key_ # pre-load round[2]
753 #lvx $inptail,0,$inp # "caller" already did this
754 #addi $inp,$inp,15 # 15 is not typo
755 subi
$inp,$inp,15 # undo "caller"
758 lvx_u
$in0,$x00,$inp # load first 8 "words"
759 le?lvsl
$inpperm,0,$idx
760 le?vspltisb
$tmp,0x0f
762 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
764 le?vperm
$in0,$in0,$in0,$inpperm
766 le?vperm
$in1,$in1,$in1,$inpperm
768 le?vperm
$in2,$in2,$in2,$inpperm
769 vxor
$out0,$in0,$rndkey0
771 le?vperm
$in3,$in3,$in3,$inpperm
772 vxor
$out1,$in1,$rndkey0
774 le?vperm
$in4,$in4,$in4,$inpperm
775 vxor
$out2,$in2,$rndkey0
778 le?vperm
$in5,$in5,$in5,$inpperm
779 vxor
$out3,$in3,$rndkey0
780 le?vperm
$in6,$in6,$in6,$inpperm
781 vxor
$out4,$in4,$rndkey0
782 le?vperm
$in7,$in7,$in7,$inpperm
783 vxor
$out5,$in5,$rndkey0
784 vxor
$out6,$in6,$rndkey0
785 vxor
$out7,$in7,$rndkey0
791 vncipher
$out0,$out0,v24
792 vncipher
$out1,$out1,v24
793 vncipher
$out2,$out2,v24
794 vncipher
$out3,$out3,v24
795 vncipher
$out4,$out4,v24
796 vncipher
$out5,$out5,v24
797 vncipher
$out6,$out6,v24
798 vncipher
$out7,$out7,v24
799 lvx v24
,$x20,$key_ # round[3]
800 addi
$key_,$key_,0x20
802 vncipher
$out0,$out0,v25
803 vncipher
$out1,$out1,v25
804 vncipher
$out2,$out2,v25
805 vncipher
$out3,$out3,v25
806 vncipher
$out4,$out4,v25
807 vncipher
$out5,$out5,v25
808 vncipher
$out6,$out6,v25
809 vncipher
$out7,$out7,v25
810 lvx v25
,$x10,$key_ # round[4]
813 subic
$len,$len,128 # $len-=128
814 vncipher
$out0,$out0,v24
815 vncipher
$out1,$out1,v24
816 vncipher
$out2,$out2,v24
817 vncipher
$out3,$out3,v24
818 vncipher
$out4,$out4,v24
819 vncipher
$out5,$out5,v24
820 vncipher
$out6,$out6,v24
821 vncipher
$out7,$out7,v24
823 subfe
. r0
,r0
,r0
# borrow?-1:0
824 vncipher
$out0,$out0,v25
825 vncipher
$out1,$out1,v25
826 vncipher
$out2,$out2,v25
827 vncipher
$out3,$out3,v25
828 vncipher
$out4,$out4,v25
829 vncipher
$out5,$out5,v25
830 vncipher
$out6,$out6,v25
831 vncipher
$out7,$out7,v25
834 vncipher
$out0,$out0,v26
835 vncipher
$out1,$out1,v26
836 vncipher
$out2,$out2,v26
837 vncipher
$out3,$out3,v26
838 vncipher
$out4,$out4,v26
839 vncipher
$out5,$out5,v26
840 vncipher
$out6,$out6,v26
841 vncipher
$out7,$out7,v26
843 add
$inp,$inp,r0
# $inp is adjusted in such
844 # way that at exit from the
845 # loop inX-in7 are loaded
847 vncipher
$out0,$out0,v27
848 vncipher
$out1,$out1,v27
849 vncipher
$out2,$out2,v27
850 vncipher
$out3,$out3,v27
851 vncipher
$out4,$out4,v27
852 vncipher
$out5,$out5,v27
853 vncipher
$out6,$out6,v27
854 vncipher
$out7,$out7,v27
856 addi
$key_,$sp,$FRAME+15 # rewind $key_
857 vncipher
$out0,$out0,v28
858 vncipher
$out1,$out1,v28
859 vncipher
$out2,$out2,v28
860 vncipher
$out3,$out3,v28
861 vncipher
$out4,$out4,v28
862 vncipher
$out5,$out5,v28
863 vncipher
$out6,$out6,v28
864 vncipher
$out7,$out7,v28
865 lvx v24
,$x00,$key_ # re-pre-load round[1]
867 vncipher
$out0,$out0,v29
868 vncipher
$out1,$out1,v29
869 vncipher
$out2,$out2,v29
870 vncipher
$out3,$out3,v29
871 vncipher
$out4,$out4,v29
872 vncipher
$out5,$out5,v29
873 vncipher
$out6,$out6,v29
874 vncipher
$out7,$out7,v29
875 lvx v25
,$x10,$key_ # re-pre-load round[2]
877 vncipher
$out0,$out0,v30
878 vxor
$ivec,$ivec,v31
# xor with last round key
879 vncipher
$out1,$out1,v30
881 vncipher
$out2,$out2,v30
883 vncipher
$out3,$out3,v30
885 vncipher
$out4,$out4,v30
887 vncipher
$out5,$out5,v30
889 vncipher
$out6,$out6,v30
891 vncipher
$out7,$out7,v30
894 vncipherlast
$out0,$out0,$ivec
895 vncipherlast
$out1,$out1,$in0
896 lvx_u
$in0,$x00,$inp # load next input block
897 vncipherlast
$out2,$out2,$in1
899 vncipherlast
$out3,$out3,$in2
900 le?vperm
$in0,$in0,$in0,$inpperm
902 vncipherlast
$out4,$out4,$in3
903 le?vperm
$in1,$in1,$in1,$inpperm
905 vncipherlast
$out5,$out5,$in4
906 le?vperm
$in2,$in2,$in2,$inpperm
908 vncipherlast
$out6,$out6,$in5
909 le?vperm
$in3,$in3,$in3,$inpperm
911 vncipherlast
$out7,$out7,$in6
912 le?vperm
$in4,$in4,$in4,$inpperm
915 le?vperm
$in5,$in5,$in5,$inpperm
919 le?vperm
$out0,$out0,$out0,$inpperm
920 le?vperm
$out1,$out1,$out1,$inpperm
921 stvx_u
$out0,$x00,$out
922 le?vperm
$in6,$in6,$in6,$inpperm
923 vxor
$out0,$in0,$rndkey0
924 le?vperm
$out2,$out2,$out2,$inpperm
925 stvx_u
$out1,$x10,$out
926 le?vperm
$in7,$in7,$in7,$inpperm
927 vxor
$out1,$in1,$rndkey0
928 le?vperm
$out3,$out3,$out3,$inpperm
929 stvx_u
$out2,$x20,$out
930 vxor
$out2,$in2,$rndkey0
931 le?vperm
$out4,$out4,$out4,$inpperm
932 stvx_u
$out3,$x30,$out
933 vxor
$out3,$in3,$rndkey0
934 le?vperm
$out5,$out5,$out5,$inpperm
935 stvx_u
$out4,$x40,$out
936 vxor
$out4,$in4,$rndkey0
937 le?vperm
$out6,$out6,$out6,$inpperm
938 stvx_u
$out5,$x50,$out
939 vxor
$out5,$in5,$rndkey0
940 le?vperm
$out7,$out7,$out7,$inpperm
941 stvx_u
$out6,$x60,$out
942 vxor
$out6,$in6,$rndkey0
943 stvx_u
$out7,$x70,$out
945 vxor
$out7,$in7,$rndkey0
948 beq Loop_cbc_dec8x
# did $len-=128 borrow?
955 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
956 vncipher
$out1,$out1,v24
957 vncipher
$out2,$out2,v24
958 vncipher
$out3,$out3,v24
959 vncipher
$out4,$out4,v24
960 vncipher
$out5,$out5,v24
961 vncipher
$out6,$out6,v24
962 vncipher
$out7,$out7,v24
963 lvx v24
,$x20,$key_ # round[3]
964 addi
$key_,$key_,0x20
966 vncipher
$out1,$out1,v25
967 vncipher
$out2,$out2,v25
968 vncipher
$out3,$out3,v25
969 vncipher
$out4,$out4,v25
970 vncipher
$out5,$out5,v25
971 vncipher
$out6,$out6,v25
972 vncipher
$out7,$out7,v25
973 lvx v25
,$x10,$key_ # round[4]
974 bdnz Loop_cbc_dec8x_tail
976 vncipher
$out1,$out1,v24
977 vncipher
$out2,$out2,v24
978 vncipher
$out3,$out3,v24
979 vncipher
$out4,$out4,v24
980 vncipher
$out5,$out5,v24
981 vncipher
$out6,$out6,v24
982 vncipher
$out7,$out7,v24
984 vncipher
$out1,$out1,v25
985 vncipher
$out2,$out2,v25
986 vncipher
$out3,$out3,v25
987 vncipher
$out4,$out4,v25
988 vncipher
$out5,$out5,v25
989 vncipher
$out6,$out6,v25
990 vncipher
$out7,$out7,v25
992 vncipher
$out1,$out1,v26
993 vncipher
$out2,$out2,v26
994 vncipher
$out3,$out3,v26
995 vncipher
$out4,$out4,v26
996 vncipher
$out5,$out5,v26
997 vncipher
$out6,$out6,v26
998 vncipher
$out7,$out7,v26
1000 vncipher
$out1,$out1,v27
1001 vncipher
$out2,$out2,v27
1002 vncipher
$out3,$out3,v27
1003 vncipher
$out4,$out4,v27
1004 vncipher
$out5,$out5,v27
1005 vncipher
$out6,$out6,v27
1006 vncipher
$out7,$out7,v27
1008 vncipher
$out1,$out1,v28
1009 vncipher
$out2,$out2,v28
1010 vncipher
$out3,$out3,v28
1011 vncipher
$out4,$out4,v28
1012 vncipher
$out5,$out5,v28
1013 vncipher
$out6,$out6,v28
1014 vncipher
$out7,$out7,v28
1016 vncipher
$out1,$out1,v29
1017 vncipher
$out2,$out2,v29
1018 vncipher
$out3,$out3,v29
1019 vncipher
$out4,$out4,v29
1020 vncipher
$out5,$out5,v29
1021 vncipher
$out6,$out6,v29
1022 vncipher
$out7,$out7,v29
1024 vncipher
$out1,$out1,v30
1025 vxor
$ivec,$ivec,v31
# last round key
1026 vncipher
$out2,$out2,v30
1028 vncipher
$out3,$out3,v30
1030 vncipher
$out4,$out4,v30
1032 vncipher
$out5,$out5,v30
1034 vncipher
$out6,$out6,v30
1036 vncipher
$out7,$out7,v30
1039 cmplwi
$len,32 # switch($len)
1044 blt Lcbc_dec8x_three
1053 vncipherlast
$out1,$out1,$ivec
1054 vncipherlast
$out2,$out2,$in1
1055 vncipherlast
$out3,$out3,$in2
1056 vncipherlast
$out4,$out4,$in3
1057 vncipherlast
$out5,$out5,$in4
1058 vncipherlast
$out6,$out6,$in5
1059 vncipherlast
$out7,$out7,$in6
1062 le?vperm
$out1,$out1,$out1,$inpperm
1063 le?vperm
$out2,$out2,$out2,$inpperm
1064 stvx_u
$out1,$x00,$out
1065 le?vperm
$out3,$out3,$out3,$inpperm
1066 stvx_u
$out2,$x10,$out
1067 le?vperm
$out4,$out4,$out4,$inpperm
1068 stvx_u
$out3,$x20,$out
1069 le?vperm
$out5,$out5,$out5,$inpperm
1070 stvx_u
$out4,$x30,$out
1071 le?vperm
$out6,$out6,$out6,$inpperm
1072 stvx_u
$out5,$x40,$out
1073 le?vperm
$out7,$out7,$out7,$inpperm
1074 stvx_u
$out6,$x50,$out
1075 stvx_u
$out7,$x60,$out
1081 vncipherlast
$out2,$out2,$ivec
1082 vncipherlast
$out3,$out3,$in2
1083 vncipherlast
$out4,$out4,$in3
1084 vncipherlast
$out5,$out5,$in4
1085 vncipherlast
$out6,$out6,$in5
1086 vncipherlast
$out7,$out7,$in6
1089 le?vperm
$out2,$out2,$out2,$inpperm
1090 le?vperm
$out3,$out3,$out3,$inpperm
1091 stvx_u
$out2,$x00,$out
1092 le?vperm
$out4,$out4,$out4,$inpperm
1093 stvx_u
$out3,$x10,$out
1094 le?vperm
$out5,$out5,$out5,$inpperm
1095 stvx_u
$out4,$x20,$out
1096 le?vperm
$out6,$out6,$out6,$inpperm
1097 stvx_u
$out5,$x30,$out
1098 le?vperm
$out7,$out7,$out7,$inpperm
1099 stvx_u
$out6,$x40,$out
1100 stvx_u
$out7,$x50,$out
1106 vncipherlast
$out3,$out3,$ivec
1107 vncipherlast
$out4,$out4,$in3
1108 vncipherlast
$out5,$out5,$in4
1109 vncipherlast
$out6,$out6,$in5
1110 vncipherlast
$out7,$out7,$in6
1113 le?vperm
$out3,$out3,$out3,$inpperm
1114 le?vperm
$out4,$out4,$out4,$inpperm
1115 stvx_u
$out3,$x00,$out
1116 le?vperm
$out5,$out5,$out5,$inpperm
1117 stvx_u
$out4,$x10,$out
1118 le?vperm
$out6,$out6,$out6,$inpperm
1119 stvx_u
$out5,$x20,$out
1120 le?vperm
$out7,$out7,$out7,$inpperm
1121 stvx_u
$out6,$x30,$out
1122 stvx_u
$out7,$x40,$out
1128 vncipherlast
$out4,$out4,$ivec
1129 vncipherlast
$out5,$out5,$in4
1130 vncipherlast
$out6,$out6,$in5
1131 vncipherlast
$out7,$out7,$in6
1134 le?vperm
$out4,$out4,$out4,$inpperm
1135 le?vperm
$out5,$out5,$out5,$inpperm
1136 stvx_u
$out4,$x00,$out
1137 le?vperm
$out6,$out6,$out6,$inpperm
1138 stvx_u
$out5,$x10,$out
1139 le?vperm
$out7,$out7,$out7,$inpperm
1140 stvx_u
$out6,$x20,$out
1141 stvx_u
$out7,$x30,$out
1147 vncipherlast
$out5,$out5,$ivec
1148 vncipherlast
$out6,$out6,$in5
1149 vncipherlast
$out7,$out7,$in6
1152 le?vperm
$out5,$out5,$out5,$inpperm
1153 le?vperm
$out6,$out6,$out6,$inpperm
1154 stvx_u
$out5,$x00,$out
1155 le?vperm
$out7,$out7,$out7,$inpperm
1156 stvx_u
$out6,$x10,$out
1157 stvx_u
$out7,$x20,$out
1163 vncipherlast
$out6,$out6,$ivec
1164 vncipherlast
$out7,$out7,$in6
1167 le?vperm
$out6,$out6,$out6,$inpperm
1168 le?vperm
$out7,$out7,$out7,$inpperm
1169 stvx_u
$out6,$x00,$out
1170 stvx_u
$out7,$x10,$out
1176 vncipherlast
$out7,$out7,$ivec
1179 le?vperm
$out7,$out7,$out7,$inpperm
1184 le?vperm
$ivec,$ivec,$ivec,$inpperm
1185 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1189 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1191 stvx
$inpperm,r11
,$sp
1193 stvx
$inpperm,r10
,$sp
1195 stvx
$inpperm,r11
,$sp
1197 stvx
$inpperm,r10
,$sp
1199 stvx
$inpperm,r11
,$sp
1201 stvx
$inpperm,r10
,$sp
1203 stvx
$inpperm,r11
,$sp
1207 lvx v20
,r10
,$sp # ABI says so
1229 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1230 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1231 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1232 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1233 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1234 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1235 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1238 .byte
0,12,0x04,0,0x80,6,6,0
1240 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1244 #########################################################################
1245 {{{ # CTR procedure[s] #
1246 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1247 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1248 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1253 .globl
.${prefix
}_ctr32_encrypt_blocks
1255 .${prefix
}_ctr32_encrypt_blocks
:
1264 vxor
$rndkey0,$rndkey0,$rndkey0
1265 le?vspltisb
$tmp,0x0f
1267 lvx
$ivec,0,$ivp # load [unaligned] iv
1268 lvsl
$inpperm,0,$ivp
1269 lvx
$inptail,$idx,$ivp
1271 le?vxor
$inpperm,$inpperm,$tmp
1272 vperm
$ivec,$ivec,$inptail,$inpperm
1273 vsldoi
$one,$rndkey0,$one,1
1276 ?lvsl
$keyperm,0,$key # prepare for unaligned key
1277 lwz
$rounds,240($key)
1279 lvsr
$inpperm,0,r11
# prepare for unaligned load
1281 addi
$inp,$inp,15 # 15 is not typo
1282 le?vxor
$inpperm,$inpperm,$tmp
1284 srwi
$rounds,$rounds,1
1286 subi
$rounds,$rounds,1
1289 bge _aesp8_ctr32_encrypt8x
1291 ?lvsr
$outperm,0,$out # prepare for unaligned store
1292 vspltisb
$outmask,-1
1294 ?vperm
$outmask,$rndkey0,$outmask,$outperm
1295 le?vxor
$outperm,$outperm,$tmp
1299 lvx
$rndkey1,$idx,$key
1301 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1302 vxor
$inout,$ivec,$rndkey0
1303 lvx
$rndkey0,$idx,$key
1309 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1310 vcipher
$inout,$inout,$rndkey1
1311 lvx
$rndkey1,$idx,$key
1313 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1314 vcipher
$inout,$inout,$rndkey0
1315 lvx
$rndkey0,$idx,$key
1319 vadduwm
$ivec,$ivec,$one
1323 subic
. $len,$len,1 # blocks--
1325 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1326 vcipher
$inout,$inout,$rndkey1
1327 lvx
$rndkey1,$idx,$key
1328 vperm
$dat,$dat,$inptail,$inpperm
1330 ?vperm
$rndkey1,$rndkey0,$rndkey1,$keyperm
1332 vxor
$dat,$dat,$rndkey1 # last round key
1333 vcipherlast
$inout,$inout,$dat
1335 lvx
$rndkey1,$idx,$key
1337 vperm
$inout,$inout,$inout,$outperm
1338 vsel
$dat,$outhead,$inout,$outmask
1340 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1342 vxor
$inout,$ivec,$rndkey0
1343 lvx
$rndkey0,$idx,$key
1350 lvx
$inout,0,$out # redundant in aligned case
1351 vsel
$inout,$outhead,$inout,$outmask
1357 .byte
0,12,0x14,0,0,0,6,0
1360 #########################################################################
1361 {{ # Optimized CTR procedure #
1363 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1364 $x00=0 if ($flavour =~ /osx/);
1365 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1366 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1367 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1368 # v26-v31 last 6 round keys
1369 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1370 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1374 _aesp8_ctr32_encrypt8x
:
1375 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1376 li r10
,`$FRAME+8*16+15`
1377 li r11
,`$FRAME+8*16+31`
1378 stvx v20
,r10
,$sp # ABI says so
1401 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1403 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1405 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1407 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1409 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1411 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1413 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1417 subi
$rounds,$rounds,3 # -4 in total
1419 lvx
$rndkey0,$x00,$key # load key schedule
1423 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
1424 addi
$key_,$sp,$FRAME+15
1428 ?vperm v24
,v30
,v31
,$keyperm
1431 stvx v24
,$x00,$key_ # off-load round[1]
1432 ?vperm v25
,v31
,v30
,$keyperm
1434 stvx v25
,$x10,$key_ # off-load round[2]
1435 addi
$key_,$key_,0x20
1436 bdnz Load_ctr32_enc_key
1439 ?vperm v24
,v30
,v31
,$keyperm
1441 stvx v24
,$x00,$key_ # off-load round[3]
1442 ?vperm v25
,v31
,v26
,$keyperm
1444 stvx v25
,$x10,$key_ # off-load round[4]
1445 addi
$key_,$sp,$FRAME+15 # rewind $key_
1446 ?vperm v26
,v26
,v27
,$keyperm
1448 ?vperm v27
,v27
,v28
,$keyperm
1450 ?vperm v28
,v28
,v29
,$keyperm
1452 ?vperm v29
,v29
,v30
,$keyperm
1453 lvx
$out0,$x70,$key # borrow $out0
1454 ?vperm v30
,v30
,v31
,$keyperm
1455 lvx v24
,$x00,$key_ # pre-load round[1]
1456 ?vperm v31
,v31
,$out0,$keyperm
1457 lvx v25
,$x10,$key_ # pre-load round[2]
1459 vadduwm
$two,$one,$one
1460 subi
$inp,$inp,15 # undo "caller"
1463 vadduwm
$out1,$ivec,$one # counter values ...
1464 vadduwm
$out2,$ivec,$two
1465 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1467 vadduwm
$out3,$out1,$two
1468 vxor
$out1,$out1,$rndkey0
1469 le?lvsl
$inpperm,0,$idx
1470 vadduwm
$out4,$out2,$two
1471 vxor
$out2,$out2,$rndkey0
1472 le?vspltisb
$tmp,0x0f
1473 vadduwm
$out5,$out3,$two
1474 vxor
$out3,$out3,$rndkey0
1475 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1476 vadduwm
$out6,$out4,$two
1477 vxor
$out4,$out4,$rndkey0
1478 vadduwm
$out7,$out5,$two
1479 vxor
$out5,$out5,$rndkey0
1480 vadduwm
$ivec,$out6,$two # next counter value
1481 vxor
$out6,$out6,$rndkey0
1482 vxor
$out7,$out7,$rndkey0
1488 vcipher
$out0,$out0,v24
1489 vcipher
$out1,$out1,v24
1490 vcipher
$out2,$out2,v24
1491 vcipher
$out3,$out3,v24
1492 vcipher
$out4,$out4,v24
1493 vcipher
$out5,$out5,v24
1494 vcipher
$out6,$out6,v24
1495 vcipher
$out7,$out7,v24
1496 Loop_ctr32_enc8x_middle
:
1497 lvx v24
,$x20,$key_ # round[3]
1498 addi
$key_,$key_,0x20
1500 vcipher
$out0,$out0,v25
1501 vcipher
$out1,$out1,v25
1502 vcipher
$out2,$out2,v25
1503 vcipher
$out3,$out3,v25
1504 vcipher
$out4,$out4,v25
1505 vcipher
$out5,$out5,v25
1506 vcipher
$out6,$out6,v25
1507 vcipher
$out7,$out7,v25
1508 lvx v25
,$x10,$key_ # round[4]
1509 bdnz Loop_ctr32_enc8x
1511 subic r11
,$len,256 # $len-256, borrow $key_
1512 vcipher
$out0,$out0,v24
1513 vcipher
$out1,$out1,v24
1514 vcipher
$out2,$out2,v24
1515 vcipher
$out3,$out3,v24
1516 vcipher
$out4,$out4,v24
1517 vcipher
$out5,$out5,v24
1518 vcipher
$out6,$out6,v24
1519 vcipher
$out7,$out7,v24
1521 subfe r0
,r0
,r0
# borrow?-1:0
1522 vcipher
$out0,$out0,v25
1523 vcipher
$out1,$out1,v25
1524 vcipher
$out2,$out2,v25
1525 vcipher
$out3,$out3,v25
1526 vcipher
$out4,$out4,v25
1527 vcipher
$out5,$out5,v25
1528 vcipher
$out6,$out6,v25
1529 vcipher
$out7,$out7,v25
1532 addi
$key_,$sp,$FRAME+15 # rewind $key_
1533 vcipher
$out0,$out0,v26
1534 vcipher
$out1,$out1,v26
1535 vcipher
$out2,$out2,v26
1536 vcipher
$out3,$out3,v26
1537 vcipher
$out4,$out4,v26
1538 vcipher
$out5,$out5,v26
1539 vcipher
$out6,$out6,v26
1540 vcipher
$out7,$out7,v26
1541 lvx v24
,$x00,$key_ # re-pre-load round[1]
1543 subic
$len,$len,129 # $len-=129
1544 vcipher
$out0,$out0,v27
1545 addi
$len,$len,1 # $len-=128 really
1546 vcipher
$out1,$out1,v27
1547 vcipher
$out2,$out2,v27
1548 vcipher
$out3,$out3,v27
1549 vcipher
$out4,$out4,v27
1550 vcipher
$out5,$out5,v27
1551 vcipher
$out6,$out6,v27
1552 vcipher
$out7,$out7,v27
1553 lvx v25
,$x10,$key_ # re-pre-load round[2]
1555 vcipher
$out0,$out0,v28
1556 lvx_u
$in0,$x00,$inp # load input
1557 vcipher
$out1,$out1,v28
1558 lvx_u
$in1,$x10,$inp
1559 vcipher
$out2,$out2,v28
1560 lvx_u
$in2,$x20,$inp
1561 vcipher
$out3,$out3,v28
1562 lvx_u
$in3,$x30,$inp
1563 vcipher
$out4,$out4,v28
1564 lvx_u
$in4,$x40,$inp
1565 vcipher
$out5,$out5,v28
1566 lvx_u
$in5,$x50,$inp
1567 vcipher
$out6,$out6,v28
1568 lvx_u
$in6,$x60,$inp
1569 vcipher
$out7,$out7,v28
1570 lvx_u
$in7,$x70,$inp
1573 vcipher
$out0,$out0,v29
1574 le?vperm
$in0,$in0,$in0,$inpperm
1575 vcipher
$out1,$out1,v29
1576 le?vperm
$in1,$in1,$in1,$inpperm
1577 vcipher
$out2,$out2,v29
1578 le?vperm
$in2,$in2,$in2,$inpperm
1579 vcipher
$out3,$out3,v29
1580 le?vperm
$in3,$in3,$in3,$inpperm
1581 vcipher
$out4,$out4,v29
1582 le?vperm
$in4,$in4,$in4,$inpperm
1583 vcipher
$out5,$out5,v29
1584 le?vperm
$in5,$in5,$in5,$inpperm
1585 vcipher
$out6,$out6,v29
1586 le?vperm
$in6,$in6,$in6,$inpperm
1587 vcipher
$out7,$out7,v29
1588 le?vperm
$in7,$in7,$in7,$inpperm
1590 add
$inp,$inp,r0
# $inp is adjusted in such
1591 # way that at exit from the
1592 # loop inX-in7 are loaded
1594 subfe
. r0
,r0
,r0
# borrow?-1:0
1595 vcipher
$out0,$out0,v30
1596 vxor
$in0,$in0,v31
# xor with last round key
1597 vcipher
$out1,$out1,v30
1599 vcipher
$out2,$out2,v30
1601 vcipher
$out3,$out3,v30
1603 vcipher
$out4,$out4,v30
1605 vcipher
$out5,$out5,v30
1607 vcipher
$out6,$out6,v30
1609 vcipher
$out7,$out7,v30
1612 bne Lctr32_enc8x_break
# did $len-129 borrow?
1614 vcipherlast
$in0,$out0,$in0
1615 vcipherlast
$in1,$out1,$in1
1616 vadduwm
$out1,$ivec,$one # counter values ...
1617 vcipherlast
$in2,$out2,$in2
1618 vadduwm
$out2,$ivec,$two
1619 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1620 vcipherlast
$in3,$out3,$in3
1621 vadduwm
$out3,$out1,$two
1622 vxor
$out1,$out1,$rndkey0
1623 vcipherlast
$in4,$out4,$in4
1624 vadduwm
$out4,$out2,$two
1625 vxor
$out2,$out2,$rndkey0
1626 vcipherlast
$in5,$out5,$in5
1627 vadduwm
$out5,$out3,$two
1628 vxor
$out3,$out3,$rndkey0
1629 vcipherlast
$in6,$out6,$in6
1630 vadduwm
$out6,$out4,$two
1631 vxor
$out4,$out4,$rndkey0
1632 vcipherlast
$in7,$out7,$in7
1633 vadduwm
$out7,$out5,$two
1634 vxor
$out5,$out5,$rndkey0
1635 le?vperm
$in0,$in0,$in0,$inpperm
1636 vadduwm
$ivec,$out6,$two # next counter value
1637 vxor
$out6,$out6,$rndkey0
1638 le?vperm
$in1,$in1,$in1,$inpperm
1639 vxor
$out7,$out7,$rndkey0
1642 vcipher
$out0,$out0,v24
1643 stvx_u
$in0,$x00,$out
1644 le?vperm
$in2,$in2,$in2,$inpperm
1645 vcipher
$out1,$out1,v24
1646 stvx_u
$in1,$x10,$out
1647 le?vperm
$in3,$in3,$in3,$inpperm
1648 vcipher
$out2,$out2,v24
1649 stvx_u
$in2,$x20,$out
1650 le?vperm
$in4,$in4,$in4,$inpperm
1651 vcipher
$out3,$out3,v24
1652 stvx_u
$in3,$x30,$out
1653 le?vperm
$in5,$in5,$in5,$inpperm
1654 vcipher
$out4,$out4,v24
1655 stvx_u
$in4,$x40,$out
1656 le?vperm
$in6,$in6,$in6,$inpperm
1657 vcipher
$out5,$out5,v24
1658 stvx_u
$in5,$x50,$out
1659 le?vperm
$in7,$in7,$in7,$inpperm
1660 vcipher
$out6,$out6,v24
1661 stvx_u
$in6,$x60,$out
1662 vcipher
$out7,$out7,v24
1663 stvx_u
$in7,$x70,$out
1666 b Loop_ctr32_enc8x_middle
1671 blt Lctr32_enc8x_one
1673 beq Lctr32_enc8x_two
1675 blt Lctr32_enc8x_three
1677 beq Lctr32_enc8x_four
1679 blt Lctr32_enc8x_five
1681 beq Lctr32_enc8x_six
1683 blt Lctr32_enc8x_seven
1686 vcipherlast
$out0,$out0,$in0
1687 vcipherlast
$out1,$out1,$in1
1688 vcipherlast
$out2,$out2,$in2
1689 vcipherlast
$out3,$out3,$in3
1690 vcipherlast
$out4,$out4,$in4
1691 vcipherlast
$out5,$out5,$in5
1692 vcipherlast
$out6,$out6,$in6
1693 vcipherlast
$out7,$out7,$in7
1695 le?vperm
$out0,$out0,$out0,$inpperm
1696 le?vperm
$out1,$out1,$out1,$inpperm
1697 stvx_u
$out0,$x00,$out
1698 le?vperm
$out2,$out2,$out2,$inpperm
1699 stvx_u
$out1,$x10,$out
1700 le?vperm
$out3,$out3,$out3,$inpperm
1701 stvx_u
$out2,$x20,$out
1702 le?vperm
$out4,$out4,$out4,$inpperm
1703 stvx_u
$out3,$x30,$out
1704 le?vperm
$out5,$out5,$out5,$inpperm
1705 stvx_u
$out4,$x40,$out
1706 le?vperm
$out6,$out6,$out6,$inpperm
1707 stvx_u
$out5,$x50,$out
1708 le?vperm
$out7,$out7,$out7,$inpperm
1709 stvx_u
$out6,$x60,$out
1710 stvx_u
$out7,$x70,$out
1716 vcipherlast
$out0,$out0,$in1
1717 vcipherlast
$out1,$out1,$in2
1718 vcipherlast
$out2,$out2,$in3
1719 vcipherlast
$out3,$out3,$in4
1720 vcipherlast
$out4,$out4,$in5
1721 vcipherlast
$out5,$out5,$in6
1722 vcipherlast
$out6,$out6,$in7
1724 le?vperm
$out0,$out0,$out0,$inpperm
1725 le?vperm
$out1,$out1,$out1,$inpperm
1726 stvx_u
$out0,$x00,$out
1727 le?vperm
$out2,$out2,$out2,$inpperm
1728 stvx_u
$out1,$x10,$out
1729 le?vperm
$out3,$out3,$out3,$inpperm
1730 stvx_u
$out2,$x20,$out
1731 le?vperm
$out4,$out4,$out4,$inpperm
1732 stvx_u
$out3,$x30,$out
1733 le?vperm
$out5,$out5,$out5,$inpperm
1734 stvx_u
$out4,$x40,$out
1735 le?vperm
$out6,$out6,$out6,$inpperm
1736 stvx_u
$out5,$x50,$out
1737 stvx_u
$out6,$x60,$out
1743 vcipherlast
$out0,$out0,$in2
1744 vcipherlast
$out1,$out1,$in3
1745 vcipherlast
$out2,$out2,$in4
1746 vcipherlast
$out3,$out3,$in5
1747 vcipherlast
$out4,$out4,$in6
1748 vcipherlast
$out5,$out5,$in7
1750 le?vperm
$out0,$out0,$out0,$inpperm
1751 le?vperm
$out1,$out1,$out1,$inpperm
1752 stvx_u
$out0,$x00,$out
1753 le?vperm
$out2,$out2,$out2,$inpperm
1754 stvx_u
$out1,$x10,$out
1755 le?vperm
$out3,$out3,$out3,$inpperm
1756 stvx_u
$out2,$x20,$out
1757 le?vperm
$out4,$out4,$out4,$inpperm
1758 stvx_u
$out3,$x30,$out
1759 le?vperm
$out5,$out5,$out5,$inpperm
1760 stvx_u
$out4,$x40,$out
1761 stvx_u
$out5,$x50,$out
1767 vcipherlast
$out0,$out0,$in3
1768 vcipherlast
$out1,$out1,$in4
1769 vcipherlast
$out2,$out2,$in5
1770 vcipherlast
$out3,$out3,$in6
1771 vcipherlast
$out4,$out4,$in7
1773 le?vperm
$out0,$out0,$out0,$inpperm
1774 le?vperm
$out1,$out1,$out1,$inpperm
1775 stvx_u
$out0,$x00,$out
1776 le?vperm
$out2,$out2,$out2,$inpperm
1777 stvx_u
$out1,$x10,$out
1778 le?vperm
$out3,$out3,$out3,$inpperm
1779 stvx_u
$out2,$x20,$out
1780 le?vperm
$out4,$out4,$out4,$inpperm
1781 stvx_u
$out3,$x30,$out
1782 stvx_u
$out4,$x40,$out
1788 vcipherlast
$out0,$out0,$in4
1789 vcipherlast
$out1,$out1,$in5
1790 vcipherlast
$out2,$out2,$in6
1791 vcipherlast
$out3,$out3,$in7
1793 le?vperm
$out0,$out0,$out0,$inpperm
1794 le?vperm
$out1,$out1,$out1,$inpperm
1795 stvx_u
$out0,$x00,$out
1796 le?vperm
$out2,$out2,$out2,$inpperm
1797 stvx_u
$out1,$x10,$out
1798 le?vperm
$out3,$out3,$out3,$inpperm
1799 stvx_u
$out2,$x20,$out
1800 stvx_u
$out3,$x30,$out
1806 vcipherlast
$out0,$out0,$in5
1807 vcipherlast
$out1,$out1,$in6
1808 vcipherlast
$out2,$out2,$in7
1810 le?vperm
$out0,$out0,$out0,$inpperm
1811 le?vperm
$out1,$out1,$out1,$inpperm
1812 stvx_u
$out0,$x00,$out
1813 le?vperm
$out2,$out2,$out2,$inpperm
1814 stvx_u
$out1,$x10,$out
1815 stvx_u
$out2,$x20,$out
1821 vcipherlast
$out0,$out0,$in6
1822 vcipherlast
$out1,$out1,$in7
1824 le?vperm
$out0,$out0,$out0,$inpperm
1825 le?vperm
$out1,$out1,$out1,$inpperm
1826 stvx_u
$out0,$x00,$out
1827 stvx_u
$out1,$x10,$out
1833 vcipherlast
$out0,$out0,$in7
1835 le?vperm
$out0,$out0,$out0,$inpperm
1842 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1844 stvx
$inpperm,r11
,$sp
1846 stvx
$inpperm,r10
,$sp
1848 stvx
$inpperm,r11
,$sp
1850 stvx
$inpperm,r10
,$sp
1852 stvx
$inpperm,r11
,$sp
1854 stvx
$inpperm,r10
,$sp
1856 stvx
$inpperm,r11
,$sp
1860 lvx v20
,r10
,$sp # ABI says so
1882 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1883 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1884 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1885 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1886 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1887 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1888 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1891 .byte
0,12,0x04,0,0x80,6,6,0
1893 .size
.${prefix
}_ctr32_encrypt_blocks
,.-.${prefix
}_ctr32_encrypt_blocks
1898 foreach(split("\n",$code)) {
1899 s/\`([^\`]*)\`/eval($1)/geo;
1901 # constants table endian-specific conversion
1902 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1906 # convert to endian-agnostic format
1908 foreach (split(/,\s*/,$2)) {
1909 my $l = /^0/?
oct:int;
1910 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1913 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
1916 # little-endian conversion
1917 if ($flavour =~ /le$/o) {
1918 SWITCH
: for($conv) {
1919 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1920 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1925 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1928 $consts=0 if (m/Lconsts:/o); # end of table
1930 # instructions prefixed with '?' are endian-specific and need
1931 # to be adjusted accordingly...
1932 if ($flavour =~ /le$/o) { # little-endian
1937 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1938 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1939 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1940 } else { # big-endian