3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
26 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
27 # systems were measured.
29 ######################################################################
30 # Current large-block performance in cycles per byte processed with
31 # 128-bit key (less is better).
33 # CBC en-/decrypt CTR XTS
34 # POWER8[le] 3.96/0.72 0.74 1.1
35 # POWER8[be] 3.75/0.65 0.66 1.0
39 if ($flavour =~ /64/) {
47 } elsif ($flavour =~ /32/) {
55 } else { die "nonsense $flavour"; }
57 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
59 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
60 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
61 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
62 die "can't locate ppc-xlate.pl";
64 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
72 #########################################################################
73 {{{ # Key setup procedures #
74 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
75 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
76 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
85 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
86 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
87 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
92 mflr
$ptr #vvvvv "distance between . and rcon
97 .byte
0,12,0x14,0,0,0,0,0
98 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
100 .globl
.${prefix
}_set_encrypt_key
102 .${prefix
}_set_encrypt_key
:
105 $PUSH r11
,$LRSAVE($sp)
109 beq
- Lenc_key_abort
# if ($inp==0) return -1;
111 beq
- Lenc_key_abort
# if ($out==0) return -1;
129 addi
$inp,$inp,15 # 15 is not typo
130 lvsr
$key,0,r9
# borrow $key
134 le?vspltisb
$mask,0x0f # borrow $mask
136 le?vxor
$key,$key,$mask # adjust for byte swap
139 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
141 vxor
$zero,$zero,$zero
144 ?lvsr
$outperm,0,$out
147 ?vperm
$outmask,$zero,$outmask,$outperm
157 vperm
$key,$in0,$in0,$mask # rotate-n-splat
158 vsldoi
$tmp,$zero,$in0,12 # >>32
159 vperm
$outtail,$in0,$in0,$outperm # rotate
160 vsel
$stage,$outhead,$outtail,$outmask
161 vmr
$outhead,$outtail
162 vcipherlast
$key,$key,$rcon
167 vsldoi
$tmp,$zero,$tmp,12 # >>32
169 vsldoi
$tmp,$zero,$tmp,12 # >>32
171 vadduwm
$rcon,$rcon,$rcon
175 lvx
$rcon,0,$ptr # last two round keys
177 vperm
$key,$in0,$in0,$mask # rotate-n-splat
178 vsldoi
$tmp,$zero,$in0,12 # >>32
179 vperm
$outtail,$in0,$in0,$outperm # rotate
180 vsel
$stage,$outhead,$outtail,$outmask
181 vmr
$outhead,$outtail
182 vcipherlast
$key,$key,$rcon
187 vsldoi
$tmp,$zero,$tmp,12 # >>32
189 vsldoi
$tmp,$zero,$tmp,12 # >>32
191 vadduwm
$rcon,$rcon,$rcon
194 vperm
$key,$in0,$in0,$mask # rotate-n-splat
195 vsldoi
$tmp,$zero,$in0,12 # >>32
196 vperm
$outtail,$in0,$in0,$outperm # rotate
197 vsel
$stage,$outhead,$outtail,$outmask
198 vmr
$outhead,$outtail
199 vcipherlast
$key,$key,$rcon
204 vsldoi
$tmp,$zero,$tmp,12 # >>32
206 vsldoi
$tmp,$zero,$tmp,12 # >>32
209 vperm
$outtail,$in0,$in0,$outperm # rotate
210 vsel
$stage,$outhead,$outtail,$outmask
211 vmr
$outhead,$outtail
214 addi
$inp,$out,15 # 15 is not typo
224 vperm
$outtail,$in0,$in0,$outperm # rotate
225 vsel
$stage,$outhead,$outtail,$outmask
226 vmr
$outhead,$outtail
229 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
230 vspltisb
$key,8 # borrow $key
232 vsububm
$mask,$mask,$key # adjust the mask
235 vperm
$key,$in1,$in1,$mask # roate-n-splat
236 vsldoi
$tmp,$zero,$in0,12 # >>32
237 vcipherlast
$key,$key,$rcon
240 vsldoi
$tmp,$zero,$tmp,12 # >>32
242 vsldoi
$tmp,$zero,$tmp,12 # >>32
245 vsldoi
$stage,$zero,$in1,8
248 vsldoi
$in1,$zero,$in1,12 # >>32
249 vadduwm
$rcon,$rcon,$rcon
253 vsldoi
$stage,$stage,$in0,8
255 vperm
$key,$in1,$in1,$mask # rotate-n-splat
256 vsldoi
$tmp,$zero,$in0,12 # >>32
257 vperm
$outtail,$stage,$stage,$outperm # rotate
258 vsel
$stage,$outhead,$outtail,$outmask
259 vmr
$outhead,$outtail
260 vcipherlast
$key,$key,$rcon
264 vsldoi
$stage,$in0,$in1,8
266 vsldoi
$tmp,$zero,$tmp,12 # >>32
267 vperm
$outtail,$stage,$stage,$outperm # rotate
268 vsel
$stage,$outhead,$outtail,$outmask
269 vmr
$outhead,$outtail
271 vsldoi
$tmp,$zero,$tmp,12 # >>32
278 vsldoi
$in1,$zero,$in1,12 # >>32
279 vadduwm
$rcon,$rcon,$rcon
283 vperm
$outtail,$in0,$in0,$outperm # rotate
284 vsel
$stage,$outhead,$outtail,$outmask
285 vmr
$outhead,$outtail
287 addi
$inp,$out,15 # 15 is not typo
300 vperm
$outtail,$in0,$in0,$outperm # rotate
301 vsel
$stage,$outhead,$outtail,$outmask
302 vmr
$outhead,$outtail
305 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
309 vperm
$key,$in1,$in1,$mask # rotate-n-splat
310 vsldoi
$tmp,$zero,$in0,12 # >>32
311 vperm
$outtail,$in1,$in1,$outperm # rotate
312 vsel
$stage,$outhead,$outtail,$outmask
313 vmr
$outhead,$outtail
314 vcipherlast
$key,$key,$rcon
319 vsldoi
$tmp,$zero,$tmp,12 # >>32
321 vsldoi
$tmp,$zero,$tmp,12 # >>32
323 vadduwm
$rcon,$rcon,$rcon
325 vperm
$outtail,$in0,$in0,$outperm # rotate
326 vsel
$stage,$outhead,$outtail,$outmask
327 vmr
$outhead,$outtail
329 addi
$inp,$out,15 # 15 is not typo
333 vspltw
$key,$in0,3 # just splat
334 vsldoi
$tmp,$zero,$in1,12 # >>32
338 vsldoi
$tmp,$zero,$tmp,12 # >>32
340 vsldoi
$tmp,$zero,$tmp,12 # >>32
348 lvx
$in1,0,$inp # redundant in aligned case
349 vsel
$in1,$outhead,$in1,$outmask
359 .byte
0,12,0x14,1,0,0,3,0
361 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
363 .globl
.${prefix
}_set_decrypt_key
365 .${prefix
}_set_decrypt_key
:
366 $STU $sp,-$FRAME($sp)
368 $PUSH r10
,$FRAME+$LRSAVE($sp)
376 subi
$inp,$out,240 # first round key
377 srwi
$rounds,$rounds,1
378 add
$out,$inp,$cnt # last round key
402 xor r3
,r3
,r3
# return value
407 .byte
0,12,4,1,0x80,0,3,0
409 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
412 #########################################################################
413 {{{ # Single block en- and decrypt procedures #
416 my $n = $dir eq "de" ?
"n" : "";
417 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
420 .globl
.${prefix
}_
${dir
}crypt
422 .${prefix
}_
${dir
}crypt:
423 lwz
$rounds,240($key)
426 li
$idx,15 # 15 is not typo
432 lvsl v2
,0,$inp # inpperm
434 ?lvsl v3
,0,r11
# outperm
437 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
439 ?lvsl v5
,0,$key # keyperm
440 srwi
$rounds,$rounds,1
443 subi
$rounds,$rounds,1
444 ?vperm v1
,v1
,v2
,v5
# align round key
466 v
${n
}cipherlast v0
,v0
,v1
470 li
$idx,15 # 15 is not typo
471 ?vperm v2
,v1
,v2
,v3
# outmask
473 lvx v1
,0,$out # outhead
474 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
484 .byte
0,12,0x14,0,0,0,3,0
486 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
492 #########################################################################
493 {{{ # CBC en- and decrypt procedures #
494 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
495 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
496 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
499 .globl
.${prefix
}_cbc_encrypt
501 .${prefix
}_cbc_encrypt
:
505 cmpwi
$enc,0 # test direction
511 vxor
$rndkey0,$rndkey0,$rndkey0
512 le?vspltisb
$tmp,0x0f
514 lvx
$ivec,0,$ivp # load [unaligned] iv
516 lvx
$inptail,$idx,$ivp
517 le?vxor
$inpperm,$inpperm,$tmp
518 vperm
$ivec,$ivec,$inptail,$inpperm
521 ?lvsl
$keyperm,0,$key # prepare for unaligned key
522 lwz
$rounds,240($key)
524 lvsr
$inpperm,0,r11
# prepare for unaligned load
526 addi
$inp,$inp,15 # 15 is not typo
527 le?vxor
$inpperm,$inpperm,$tmp
529 ?lvsr
$outperm,0,$out # prepare for unaligned store
532 ?vperm
$outmask,$rndkey0,$outmask,$outperm
533 le?vxor
$outperm,$outperm,$tmp
535 srwi
$rounds,$rounds,1
537 subi
$rounds,$rounds,1
545 subi
$len,$len,16 # len-=16
548 vperm
$inout,$inout,$inptail,$inpperm
549 lvx
$rndkey1,$idx,$key
551 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
552 vxor
$inout,$inout,$rndkey0
553 lvx
$rndkey0,$idx,$key
555 vxor
$inout,$inout,$ivec
558 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
559 vcipher
$inout,$inout,$rndkey1
560 lvx
$rndkey1,$idx,$key
562 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
563 vcipher
$inout,$inout,$rndkey0
564 lvx
$rndkey0,$idx,$key
568 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
569 vcipher
$inout,$inout,$rndkey1
570 lvx
$rndkey1,$idx,$key
572 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
573 vcipherlast
$ivec,$inout,$rndkey0
576 vperm
$tmp,$ivec,$ivec,$outperm
577 vsel
$inout,$outhead,$tmp,$outmask
588 bge _aesp8_cbc_decrypt8x
593 subi
$len,$len,16 # len-=16
596 vperm
$tmp,$tmp,$inptail,$inpperm
597 lvx
$rndkey1,$idx,$key
599 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
600 vxor
$inout,$tmp,$rndkey0
601 lvx
$rndkey0,$idx,$key
605 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
606 vncipher
$inout,$inout,$rndkey1
607 lvx
$rndkey1,$idx,$key
609 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
610 vncipher
$inout,$inout,$rndkey0
611 lvx
$rndkey0,$idx,$key
615 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
616 vncipher
$inout,$inout,$rndkey1
617 lvx
$rndkey1,$idx,$key
619 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
620 vncipherlast
$inout,$inout,$rndkey0
623 vxor
$inout,$inout,$ivec
625 vperm
$tmp,$inout,$inout,$outperm
626 vsel
$inout,$outhead,$tmp,$outmask
634 lvx
$inout,0,$out # redundant in aligned case
635 vsel
$inout,$outhead,$inout,$outmask
638 neg
$enc,$ivp # write [unaligned] iv
639 li
$idx,15 # 15 is not typo
640 vxor
$rndkey0,$rndkey0,$rndkey0
642 le?vspltisb
$tmp,0x0f
643 ?lvsl
$outperm,0,$enc
644 ?vperm
$outmask,$rndkey0,$outmask,$outperm
645 le?vxor
$outperm,$outperm,$tmp
647 vperm
$ivec,$ivec,$ivec,$outperm
648 vsel
$inout,$outhead,$ivec,$outmask
649 lvx
$inptail,$idx,$ivp
651 vsel
$inout,$ivec,$inptail,$outmask
652 stvx
$inout,$idx,$ivp
657 .byte
0,12,0x14,0,0,0,6,0
660 #########################################################################
661 {{ # Optimized CBC decrypt procedure #
663 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
664 $x00=0 if ($flavour =~ /osx/);
665 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
666 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
667 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
668 # v26-v31 last 6 round keys
669 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
673 _aesp8_cbc_decrypt8x
:
674 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
675 li r10
,`$FRAME+8*16+15`
676 li r11
,`$FRAME+8*16+31`
677 stvx v20
,r10
,$sp # ABI says so
700 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
702 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
704 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
706 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
708 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
710 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
712 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
716 subi
$rounds,$rounds,3 # -4 in total
717 subi
$len,$len,128 # bias
719 lvx
$rndkey0,$x00,$key # load key schedule
723 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
724 addi
$key_,$sp,$FRAME+15
728 ?vperm v24
,v30
,v31
,$keyperm
731 stvx v24
,$x00,$key_ # off-load round[1]
732 ?vperm v25
,v31
,v30
,$keyperm
734 stvx v25
,$x10,$key_ # off-load round[2]
735 addi
$key_,$key_,0x20
736 bdnz Load_cbc_dec_key
739 ?vperm v24
,v30
,v31
,$keyperm
741 stvx v24
,$x00,$key_ # off-load round[3]
742 ?vperm v25
,v31
,v26
,$keyperm
744 stvx v25
,$x10,$key_ # off-load round[4]
745 addi
$key_,$sp,$FRAME+15 # rewind $key_
746 ?vperm v26
,v26
,v27
,$keyperm
748 ?vperm v27
,v27
,v28
,$keyperm
750 ?vperm v28
,v28
,v29
,$keyperm
752 ?vperm v29
,v29
,v30
,$keyperm
753 lvx
$out0,$x70,$key # borrow $out0
754 ?vperm v30
,v30
,v31
,$keyperm
755 lvx v24
,$x00,$key_ # pre-load round[1]
756 ?vperm v31
,v31
,$out0,$keyperm
757 lvx v25
,$x10,$key_ # pre-load round[2]
759 #lvx $inptail,0,$inp # "caller" already did this
760 #addi $inp,$inp,15 # 15 is not typo
761 subi
$inp,$inp,15 # undo "caller"
764 lvx_u
$in0,$x00,$inp # load first 8 "words"
765 le?lvsl
$inpperm,0,$idx
766 le?vspltisb
$tmp,0x0f
768 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
770 le?vperm
$in0,$in0,$in0,$inpperm
772 le?vperm
$in1,$in1,$in1,$inpperm
774 le?vperm
$in2,$in2,$in2,$inpperm
775 vxor
$out0,$in0,$rndkey0
777 le?vperm
$in3,$in3,$in3,$inpperm
778 vxor
$out1,$in1,$rndkey0
780 le?vperm
$in4,$in4,$in4,$inpperm
781 vxor
$out2,$in2,$rndkey0
784 le?vperm
$in5,$in5,$in5,$inpperm
785 vxor
$out3,$in3,$rndkey0
786 le?vperm
$in6,$in6,$in6,$inpperm
787 vxor
$out4,$in4,$rndkey0
788 le?vperm
$in7,$in7,$in7,$inpperm
789 vxor
$out5,$in5,$rndkey0
790 vxor
$out6,$in6,$rndkey0
791 vxor
$out7,$in7,$rndkey0
797 vncipher
$out0,$out0,v24
798 vncipher
$out1,$out1,v24
799 vncipher
$out2,$out2,v24
800 vncipher
$out3,$out3,v24
801 vncipher
$out4,$out4,v24
802 vncipher
$out5,$out5,v24
803 vncipher
$out6,$out6,v24
804 vncipher
$out7,$out7,v24
805 lvx v24
,$x20,$key_ # round[3]
806 addi
$key_,$key_,0x20
808 vncipher
$out0,$out0,v25
809 vncipher
$out1,$out1,v25
810 vncipher
$out2,$out2,v25
811 vncipher
$out3,$out3,v25
812 vncipher
$out4,$out4,v25
813 vncipher
$out5,$out5,v25
814 vncipher
$out6,$out6,v25
815 vncipher
$out7,$out7,v25
816 lvx v25
,$x10,$key_ # round[4]
819 subic
$len,$len,128 # $len-=128
820 vncipher
$out0,$out0,v24
821 vncipher
$out1,$out1,v24
822 vncipher
$out2,$out2,v24
823 vncipher
$out3,$out3,v24
824 vncipher
$out4,$out4,v24
825 vncipher
$out5,$out5,v24
826 vncipher
$out6,$out6,v24
827 vncipher
$out7,$out7,v24
829 subfe
. r0
,r0
,r0
# borrow?-1:0
830 vncipher
$out0,$out0,v25
831 vncipher
$out1,$out1,v25
832 vncipher
$out2,$out2,v25
833 vncipher
$out3,$out3,v25
834 vncipher
$out4,$out4,v25
835 vncipher
$out5,$out5,v25
836 vncipher
$out6,$out6,v25
837 vncipher
$out7,$out7,v25
840 vncipher
$out0,$out0,v26
841 vncipher
$out1,$out1,v26
842 vncipher
$out2,$out2,v26
843 vncipher
$out3,$out3,v26
844 vncipher
$out4,$out4,v26
845 vncipher
$out5,$out5,v26
846 vncipher
$out6,$out6,v26
847 vncipher
$out7,$out7,v26
849 add
$inp,$inp,r0
# $inp is adjusted in such
850 # way that at exit from the
851 # loop inX-in7 are loaded
853 vncipher
$out0,$out0,v27
854 vncipher
$out1,$out1,v27
855 vncipher
$out2,$out2,v27
856 vncipher
$out3,$out3,v27
857 vncipher
$out4,$out4,v27
858 vncipher
$out5,$out5,v27
859 vncipher
$out6,$out6,v27
860 vncipher
$out7,$out7,v27
862 addi
$key_,$sp,$FRAME+15 # rewind $key_
863 vncipher
$out0,$out0,v28
864 vncipher
$out1,$out1,v28
865 vncipher
$out2,$out2,v28
866 vncipher
$out3,$out3,v28
867 vncipher
$out4,$out4,v28
868 vncipher
$out5,$out5,v28
869 vncipher
$out6,$out6,v28
870 vncipher
$out7,$out7,v28
871 lvx v24
,$x00,$key_ # re-pre-load round[1]
873 vncipher
$out0,$out0,v29
874 vncipher
$out1,$out1,v29
875 vncipher
$out2,$out2,v29
876 vncipher
$out3,$out3,v29
877 vncipher
$out4,$out4,v29
878 vncipher
$out5,$out5,v29
879 vncipher
$out6,$out6,v29
880 vncipher
$out7,$out7,v29
881 lvx v25
,$x10,$key_ # re-pre-load round[2]
883 vncipher
$out0,$out0,v30
884 vxor
$ivec,$ivec,v31
# xor with last round key
885 vncipher
$out1,$out1,v30
887 vncipher
$out2,$out2,v30
889 vncipher
$out3,$out3,v30
891 vncipher
$out4,$out4,v30
893 vncipher
$out5,$out5,v30
895 vncipher
$out6,$out6,v30
897 vncipher
$out7,$out7,v30
900 vncipherlast
$out0,$out0,$ivec
901 vncipherlast
$out1,$out1,$in0
902 lvx_u
$in0,$x00,$inp # load next input block
903 vncipherlast
$out2,$out2,$in1
905 vncipherlast
$out3,$out3,$in2
906 le?vperm
$in0,$in0,$in0,$inpperm
908 vncipherlast
$out4,$out4,$in3
909 le?vperm
$in1,$in1,$in1,$inpperm
911 vncipherlast
$out5,$out5,$in4
912 le?vperm
$in2,$in2,$in2,$inpperm
914 vncipherlast
$out6,$out6,$in5
915 le?vperm
$in3,$in3,$in3,$inpperm
917 vncipherlast
$out7,$out7,$in6
918 le?vperm
$in4,$in4,$in4,$inpperm
921 le?vperm
$in5,$in5,$in5,$inpperm
925 le?vperm
$out0,$out0,$out0,$inpperm
926 le?vperm
$out1,$out1,$out1,$inpperm
927 stvx_u
$out0,$x00,$out
928 le?vperm
$in6,$in6,$in6,$inpperm
929 vxor
$out0,$in0,$rndkey0
930 le?vperm
$out2,$out2,$out2,$inpperm
931 stvx_u
$out1,$x10,$out
932 le?vperm
$in7,$in7,$in7,$inpperm
933 vxor
$out1,$in1,$rndkey0
934 le?vperm
$out3,$out3,$out3,$inpperm
935 stvx_u
$out2,$x20,$out
936 vxor
$out2,$in2,$rndkey0
937 le?vperm
$out4,$out4,$out4,$inpperm
938 stvx_u
$out3,$x30,$out
939 vxor
$out3,$in3,$rndkey0
940 le?vperm
$out5,$out5,$out5,$inpperm
941 stvx_u
$out4,$x40,$out
942 vxor
$out4,$in4,$rndkey0
943 le?vperm
$out6,$out6,$out6,$inpperm
944 stvx_u
$out5,$x50,$out
945 vxor
$out5,$in5,$rndkey0
946 le?vperm
$out7,$out7,$out7,$inpperm
947 stvx_u
$out6,$x60,$out
948 vxor
$out6,$in6,$rndkey0
949 stvx_u
$out7,$x70,$out
951 vxor
$out7,$in7,$rndkey0
954 beq Loop_cbc_dec8x
# did $len-=128 borrow?
961 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
962 vncipher
$out1,$out1,v24
963 vncipher
$out2,$out2,v24
964 vncipher
$out3,$out3,v24
965 vncipher
$out4,$out4,v24
966 vncipher
$out5,$out5,v24
967 vncipher
$out6,$out6,v24
968 vncipher
$out7,$out7,v24
969 lvx v24
,$x20,$key_ # round[3]
970 addi
$key_,$key_,0x20
972 vncipher
$out1,$out1,v25
973 vncipher
$out2,$out2,v25
974 vncipher
$out3,$out3,v25
975 vncipher
$out4,$out4,v25
976 vncipher
$out5,$out5,v25
977 vncipher
$out6,$out6,v25
978 vncipher
$out7,$out7,v25
979 lvx v25
,$x10,$key_ # round[4]
980 bdnz Loop_cbc_dec8x_tail
982 vncipher
$out1,$out1,v24
983 vncipher
$out2,$out2,v24
984 vncipher
$out3,$out3,v24
985 vncipher
$out4,$out4,v24
986 vncipher
$out5,$out5,v24
987 vncipher
$out6,$out6,v24
988 vncipher
$out7,$out7,v24
990 vncipher
$out1,$out1,v25
991 vncipher
$out2,$out2,v25
992 vncipher
$out3,$out3,v25
993 vncipher
$out4,$out4,v25
994 vncipher
$out5,$out5,v25
995 vncipher
$out6,$out6,v25
996 vncipher
$out7,$out7,v25
998 vncipher
$out1,$out1,v26
999 vncipher
$out2,$out2,v26
1000 vncipher
$out3,$out3,v26
1001 vncipher
$out4,$out4,v26
1002 vncipher
$out5,$out5,v26
1003 vncipher
$out6,$out6,v26
1004 vncipher
$out7,$out7,v26
1006 vncipher
$out1,$out1,v27
1007 vncipher
$out2,$out2,v27
1008 vncipher
$out3,$out3,v27
1009 vncipher
$out4,$out4,v27
1010 vncipher
$out5,$out5,v27
1011 vncipher
$out6,$out6,v27
1012 vncipher
$out7,$out7,v27
1014 vncipher
$out1,$out1,v28
1015 vncipher
$out2,$out2,v28
1016 vncipher
$out3,$out3,v28
1017 vncipher
$out4,$out4,v28
1018 vncipher
$out5,$out5,v28
1019 vncipher
$out6,$out6,v28
1020 vncipher
$out7,$out7,v28
1022 vncipher
$out1,$out1,v29
1023 vncipher
$out2,$out2,v29
1024 vncipher
$out3,$out3,v29
1025 vncipher
$out4,$out4,v29
1026 vncipher
$out5,$out5,v29
1027 vncipher
$out6,$out6,v29
1028 vncipher
$out7,$out7,v29
1030 vncipher
$out1,$out1,v30
1031 vxor
$ivec,$ivec,v31
# last round key
1032 vncipher
$out2,$out2,v30
1034 vncipher
$out3,$out3,v30
1036 vncipher
$out4,$out4,v30
1038 vncipher
$out5,$out5,v30
1040 vncipher
$out6,$out6,v30
1042 vncipher
$out7,$out7,v30
1045 cmplwi
$len,32 # switch($len)
1050 blt Lcbc_dec8x_three
1059 vncipherlast
$out1,$out1,$ivec
1060 vncipherlast
$out2,$out2,$in1
1061 vncipherlast
$out3,$out3,$in2
1062 vncipherlast
$out4,$out4,$in3
1063 vncipherlast
$out5,$out5,$in4
1064 vncipherlast
$out6,$out6,$in5
1065 vncipherlast
$out7,$out7,$in6
1068 le?vperm
$out1,$out1,$out1,$inpperm
1069 le?vperm
$out2,$out2,$out2,$inpperm
1070 stvx_u
$out1,$x00,$out
1071 le?vperm
$out3,$out3,$out3,$inpperm
1072 stvx_u
$out2,$x10,$out
1073 le?vperm
$out4,$out4,$out4,$inpperm
1074 stvx_u
$out3,$x20,$out
1075 le?vperm
$out5,$out5,$out5,$inpperm
1076 stvx_u
$out4,$x30,$out
1077 le?vperm
$out6,$out6,$out6,$inpperm
1078 stvx_u
$out5,$x40,$out
1079 le?vperm
$out7,$out7,$out7,$inpperm
1080 stvx_u
$out6,$x50,$out
1081 stvx_u
$out7,$x60,$out
1087 vncipherlast
$out2,$out2,$ivec
1088 vncipherlast
$out3,$out3,$in2
1089 vncipherlast
$out4,$out4,$in3
1090 vncipherlast
$out5,$out5,$in4
1091 vncipherlast
$out6,$out6,$in5
1092 vncipherlast
$out7,$out7,$in6
1095 le?vperm
$out2,$out2,$out2,$inpperm
1096 le?vperm
$out3,$out3,$out3,$inpperm
1097 stvx_u
$out2,$x00,$out
1098 le?vperm
$out4,$out4,$out4,$inpperm
1099 stvx_u
$out3,$x10,$out
1100 le?vperm
$out5,$out5,$out5,$inpperm
1101 stvx_u
$out4,$x20,$out
1102 le?vperm
$out6,$out6,$out6,$inpperm
1103 stvx_u
$out5,$x30,$out
1104 le?vperm
$out7,$out7,$out7,$inpperm
1105 stvx_u
$out6,$x40,$out
1106 stvx_u
$out7,$x50,$out
1112 vncipherlast
$out3,$out3,$ivec
1113 vncipherlast
$out4,$out4,$in3
1114 vncipherlast
$out5,$out5,$in4
1115 vncipherlast
$out6,$out6,$in5
1116 vncipherlast
$out7,$out7,$in6
1119 le?vperm
$out3,$out3,$out3,$inpperm
1120 le?vperm
$out4,$out4,$out4,$inpperm
1121 stvx_u
$out3,$x00,$out
1122 le?vperm
$out5,$out5,$out5,$inpperm
1123 stvx_u
$out4,$x10,$out
1124 le?vperm
$out6,$out6,$out6,$inpperm
1125 stvx_u
$out5,$x20,$out
1126 le?vperm
$out7,$out7,$out7,$inpperm
1127 stvx_u
$out6,$x30,$out
1128 stvx_u
$out7,$x40,$out
1134 vncipherlast
$out4,$out4,$ivec
1135 vncipherlast
$out5,$out5,$in4
1136 vncipherlast
$out6,$out6,$in5
1137 vncipherlast
$out7,$out7,$in6
1140 le?vperm
$out4,$out4,$out4,$inpperm
1141 le?vperm
$out5,$out5,$out5,$inpperm
1142 stvx_u
$out4,$x00,$out
1143 le?vperm
$out6,$out6,$out6,$inpperm
1144 stvx_u
$out5,$x10,$out
1145 le?vperm
$out7,$out7,$out7,$inpperm
1146 stvx_u
$out6,$x20,$out
1147 stvx_u
$out7,$x30,$out
1153 vncipherlast
$out5,$out5,$ivec
1154 vncipherlast
$out6,$out6,$in5
1155 vncipherlast
$out7,$out7,$in6
1158 le?vperm
$out5,$out5,$out5,$inpperm
1159 le?vperm
$out6,$out6,$out6,$inpperm
1160 stvx_u
$out5,$x00,$out
1161 le?vperm
$out7,$out7,$out7,$inpperm
1162 stvx_u
$out6,$x10,$out
1163 stvx_u
$out7,$x20,$out
1169 vncipherlast
$out6,$out6,$ivec
1170 vncipherlast
$out7,$out7,$in6
1173 le?vperm
$out6,$out6,$out6,$inpperm
1174 le?vperm
$out7,$out7,$out7,$inpperm
1175 stvx_u
$out6,$x00,$out
1176 stvx_u
$out7,$x10,$out
1182 vncipherlast
$out7,$out7,$ivec
1185 le?vperm
$out7,$out7,$out7,$inpperm
1190 le?vperm
$ivec,$ivec,$ivec,$inpperm
1191 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1195 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1197 stvx
$inpperm,r11
,$sp
1199 stvx
$inpperm,r10
,$sp
1201 stvx
$inpperm,r11
,$sp
1203 stvx
$inpperm,r10
,$sp
1205 stvx
$inpperm,r11
,$sp
1207 stvx
$inpperm,r10
,$sp
1209 stvx
$inpperm,r11
,$sp
1213 lvx v20
,r10
,$sp # ABI says so
1235 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1236 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1237 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1238 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1239 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1240 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1241 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1244 .byte
0,12,0x04,0,0x80,6,6,0
1246 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1250 #########################################################################
1251 {{{ # CTR procedure[s] #
1252 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1253 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1254 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1259 .globl
.${prefix
}_ctr32_encrypt_blocks
1261 .${prefix
}_ctr32_encrypt_blocks
:
1270 vxor
$rndkey0,$rndkey0,$rndkey0
1271 le?vspltisb
$tmp,0x0f
1273 lvx
$ivec,0,$ivp # load [unaligned] iv
1274 lvsl
$inpperm,0,$ivp
1275 lvx
$inptail,$idx,$ivp
1277 le?vxor
$inpperm,$inpperm,$tmp
1278 vperm
$ivec,$ivec,$inptail,$inpperm
1279 vsldoi
$one,$rndkey0,$one,1
1282 ?lvsl
$keyperm,0,$key # prepare for unaligned key
1283 lwz
$rounds,240($key)
1285 lvsr
$inpperm,0,r11
# prepare for unaligned load
1287 addi
$inp,$inp,15 # 15 is not typo
1288 le?vxor
$inpperm,$inpperm,$tmp
1290 srwi
$rounds,$rounds,1
1292 subi
$rounds,$rounds,1
1295 bge _aesp8_ctr32_encrypt8x
1297 ?lvsr
$outperm,0,$out # prepare for unaligned store
1298 vspltisb
$outmask,-1
1300 ?vperm
$outmask,$rndkey0,$outmask,$outperm
1301 le?vxor
$outperm,$outperm,$tmp
1305 lvx
$rndkey1,$idx,$key
1307 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1308 vxor
$inout,$ivec,$rndkey0
1309 lvx
$rndkey0,$idx,$key
1315 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1316 vcipher
$inout,$inout,$rndkey1
1317 lvx
$rndkey1,$idx,$key
1319 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1320 vcipher
$inout,$inout,$rndkey0
1321 lvx
$rndkey0,$idx,$key
1325 vadduwm
$ivec,$ivec,$one
1329 subic
. $len,$len,1 # blocks--
1331 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1332 vcipher
$inout,$inout,$rndkey1
1333 lvx
$rndkey1,$idx,$key
1334 vperm
$dat,$dat,$inptail,$inpperm
1336 ?vperm
$rndkey1,$rndkey0,$rndkey1,$keyperm
1338 vxor
$dat,$dat,$rndkey1 # last round key
1339 vcipherlast
$inout,$inout,$dat
1341 lvx
$rndkey1,$idx,$key
1343 vperm
$inout,$inout,$inout,$outperm
1344 vsel
$dat,$outhead,$inout,$outmask
1346 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1348 vxor
$inout,$ivec,$rndkey0
1349 lvx
$rndkey0,$idx,$key
1356 lvx
$inout,0,$out # redundant in aligned case
1357 vsel
$inout,$outhead,$inout,$outmask
1363 .byte
0,12,0x14,0,0,0,6,0
1366 #########################################################################
1367 {{ # Optimized CTR procedure #
1369 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1370 $x00=0 if ($flavour =~ /osx/);
1371 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1372 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1373 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1374 # v26-v31 last 6 round keys
1375 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1376 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1380 _aesp8_ctr32_encrypt8x
:
1381 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1382 li r10
,`$FRAME+8*16+15`
1383 li r11
,`$FRAME+8*16+31`
1384 stvx v20
,r10
,$sp # ABI says so
1407 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1409 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1411 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1413 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1415 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1417 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1419 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1423 subi
$rounds,$rounds,3 # -4 in total
1425 lvx
$rndkey0,$x00,$key # load key schedule
1429 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
1430 addi
$key_,$sp,$FRAME+15
1434 ?vperm v24
,v30
,v31
,$keyperm
1437 stvx v24
,$x00,$key_ # off-load round[1]
1438 ?vperm v25
,v31
,v30
,$keyperm
1440 stvx v25
,$x10,$key_ # off-load round[2]
1441 addi
$key_,$key_,0x20
1442 bdnz Load_ctr32_enc_key
1445 ?vperm v24
,v30
,v31
,$keyperm
1447 stvx v24
,$x00,$key_ # off-load round[3]
1448 ?vperm v25
,v31
,v26
,$keyperm
1450 stvx v25
,$x10,$key_ # off-load round[4]
1451 addi
$key_,$sp,$FRAME+15 # rewind $key_
1452 ?vperm v26
,v26
,v27
,$keyperm
1454 ?vperm v27
,v27
,v28
,$keyperm
1456 ?vperm v28
,v28
,v29
,$keyperm
1458 ?vperm v29
,v29
,v30
,$keyperm
1459 lvx
$out0,$x70,$key # borrow $out0
1460 ?vperm v30
,v30
,v31
,$keyperm
1461 lvx v24
,$x00,$key_ # pre-load round[1]
1462 ?vperm v31
,v31
,$out0,$keyperm
1463 lvx v25
,$x10,$key_ # pre-load round[2]
1465 vadduwm
$two,$one,$one
1466 subi
$inp,$inp,15 # undo "caller"
1469 vadduwm
$out1,$ivec,$one # counter values ...
1470 vadduwm
$out2,$ivec,$two
1471 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1473 vadduwm
$out3,$out1,$two
1474 vxor
$out1,$out1,$rndkey0
1475 le?lvsl
$inpperm,0,$idx
1476 vadduwm
$out4,$out2,$two
1477 vxor
$out2,$out2,$rndkey0
1478 le?vspltisb
$tmp,0x0f
1479 vadduwm
$out5,$out3,$two
1480 vxor
$out3,$out3,$rndkey0
1481 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1482 vadduwm
$out6,$out4,$two
1483 vxor
$out4,$out4,$rndkey0
1484 vadduwm
$out7,$out5,$two
1485 vxor
$out5,$out5,$rndkey0
1486 vadduwm
$ivec,$out6,$two # next counter value
1487 vxor
$out6,$out6,$rndkey0
1488 vxor
$out7,$out7,$rndkey0
1494 vcipher
$out0,$out0,v24
1495 vcipher
$out1,$out1,v24
1496 vcipher
$out2,$out2,v24
1497 vcipher
$out3,$out3,v24
1498 vcipher
$out4,$out4,v24
1499 vcipher
$out5,$out5,v24
1500 vcipher
$out6,$out6,v24
1501 vcipher
$out7,$out7,v24
1502 Loop_ctr32_enc8x_middle
:
1503 lvx v24
,$x20,$key_ # round[3]
1504 addi
$key_,$key_,0x20
1506 vcipher
$out0,$out0,v25
1507 vcipher
$out1,$out1,v25
1508 vcipher
$out2,$out2,v25
1509 vcipher
$out3,$out3,v25
1510 vcipher
$out4,$out4,v25
1511 vcipher
$out5,$out5,v25
1512 vcipher
$out6,$out6,v25
1513 vcipher
$out7,$out7,v25
1514 lvx v25
,$x10,$key_ # round[4]
1515 bdnz Loop_ctr32_enc8x
1517 subic r11
,$len,256 # $len-256, borrow $key_
1518 vcipher
$out0,$out0,v24
1519 vcipher
$out1,$out1,v24
1520 vcipher
$out2,$out2,v24
1521 vcipher
$out3,$out3,v24
1522 vcipher
$out4,$out4,v24
1523 vcipher
$out5,$out5,v24
1524 vcipher
$out6,$out6,v24
1525 vcipher
$out7,$out7,v24
1527 subfe r0
,r0
,r0
# borrow?-1:0
1528 vcipher
$out0,$out0,v25
1529 vcipher
$out1,$out1,v25
1530 vcipher
$out2,$out2,v25
1531 vcipher
$out3,$out3,v25
1532 vcipher
$out4,$out4,v25
1533 vcipher
$out5,$out5,v25
1534 vcipher
$out6,$out6,v25
1535 vcipher
$out7,$out7,v25
1538 addi
$key_,$sp,$FRAME+15 # rewind $key_
1539 vcipher
$out0,$out0,v26
1540 vcipher
$out1,$out1,v26
1541 vcipher
$out2,$out2,v26
1542 vcipher
$out3,$out3,v26
1543 vcipher
$out4,$out4,v26
1544 vcipher
$out5,$out5,v26
1545 vcipher
$out6,$out6,v26
1546 vcipher
$out7,$out7,v26
1547 lvx v24
,$x00,$key_ # re-pre-load round[1]
1549 subic
$len,$len,129 # $len-=129
1550 vcipher
$out0,$out0,v27
1551 addi
$len,$len,1 # $len-=128 really
1552 vcipher
$out1,$out1,v27
1553 vcipher
$out2,$out2,v27
1554 vcipher
$out3,$out3,v27
1555 vcipher
$out4,$out4,v27
1556 vcipher
$out5,$out5,v27
1557 vcipher
$out6,$out6,v27
1558 vcipher
$out7,$out7,v27
1559 lvx v25
,$x10,$key_ # re-pre-load round[2]
1561 vcipher
$out0,$out0,v28
1562 lvx_u
$in0,$x00,$inp # load input
1563 vcipher
$out1,$out1,v28
1564 lvx_u
$in1,$x10,$inp
1565 vcipher
$out2,$out2,v28
1566 lvx_u
$in2,$x20,$inp
1567 vcipher
$out3,$out3,v28
1568 lvx_u
$in3,$x30,$inp
1569 vcipher
$out4,$out4,v28
1570 lvx_u
$in4,$x40,$inp
1571 vcipher
$out5,$out5,v28
1572 lvx_u
$in5,$x50,$inp
1573 vcipher
$out6,$out6,v28
1574 lvx_u
$in6,$x60,$inp
1575 vcipher
$out7,$out7,v28
1576 lvx_u
$in7,$x70,$inp
1579 vcipher
$out0,$out0,v29
1580 le?vperm
$in0,$in0,$in0,$inpperm
1581 vcipher
$out1,$out1,v29
1582 le?vperm
$in1,$in1,$in1,$inpperm
1583 vcipher
$out2,$out2,v29
1584 le?vperm
$in2,$in2,$in2,$inpperm
1585 vcipher
$out3,$out3,v29
1586 le?vperm
$in3,$in3,$in3,$inpperm
1587 vcipher
$out4,$out4,v29
1588 le?vperm
$in4,$in4,$in4,$inpperm
1589 vcipher
$out5,$out5,v29
1590 le?vperm
$in5,$in5,$in5,$inpperm
1591 vcipher
$out6,$out6,v29
1592 le?vperm
$in6,$in6,$in6,$inpperm
1593 vcipher
$out7,$out7,v29
1594 le?vperm
$in7,$in7,$in7,$inpperm
1596 add
$inp,$inp,r0
# $inp is adjusted in such
1597 # way that at exit from the
1598 # loop inX-in7 are loaded
1600 subfe
. r0
,r0
,r0
# borrow?-1:0
1601 vcipher
$out0,$out0,v30
1602 vxor
$in0,$in0,v31
# xor with last round key
1603 vcipher
$out1,$out1,v30
1605 vcipher
$out2,$out2,v30
1607 vcipher
$out3,$out3,v30
1609 vcipher
$out4,$out4,v30
1611 vcipher
$out5,$out5,v30
1613 vcipher
$out6,$out6,v30
1615 vcipher
$out7,$out7,v30
1618 bne Lctr32_enc8x_break
# did $len-129 borrow?
1620 vcipherlast
$in0,$out0,$in0
1621 vcipherlast
$in1,$out1,$in1
1622 vadduwm
$out1,$ivec,$one # counter values ...
1623 vcipherlast
$in2,$out2,$in2
1624 vadduwm
$out2,$ivec,$two
1625 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1626 vcipherlast
$in3,$out3,$in3
1627 vadduwm
$out3,$out1,$two
1628 vxor
$out1,$out1,$rndkey0
1629 vcipherlast
$in4,$out4,$in4
1630 vadduwm
$out4,$out2,$two
1631 vxor
$out2,$out2,$rndkey0
1632 vcipherlast
$in5,$out5,$in5
1633 vadduwm
$out5,$out3,$two
1634 vxor
$out3,$out3,$rndkey0
1635 vcipherlast
$in6,$out6,$in6
1636 vadduwm
$out6,$out4,$two
1637 vxor
$out4,$out4,$rndkey0
1638 vcipherlast
$in7,$out7,$in7
1639 vadduwm
$out7,$out5,$two
1640 vxor
$out5,$out5,$rndkey0
1641 le?vperm
$in0,$in0,$in0,$inpperm
1642 vadduwm
$ivec,$out6,$two # next counter value
1643 vxor
$out6,$out6,$rndkey0
1644 le?vperm
$in1,$in1,$in1,$inpperm
1645 vxor
$out7,$out7,$rndkey0
1648 vcipher
$out0,$out0,v24
1649 stvx_u
$in0,$x00,$out
1650 le?vperm
$in2,$in2,$in2,$inpperm
1651 vcipher
$out1,$out1,v24
1652 stvx_u
$in1,$x10,$out
1653 le?vperm
$in3,$in3,$in3,$inpperm
1654 vcipher
$out2,$out2,v24
1655 stvx_u
$in2,$x20,$out
1656 le?vperm
$in4,$in4,$in4,$inpperm
1657 vcipher
$out3,$out3,v24
1658 stvx_u
$in3,$x30,$out
1659 le?vperm
$in5,$in5,$in5,$inpperm
1660 vcipher
$out4,$out4,v24
1661 stvx_u
$in4,$x40,$out
1662 le?vperm
$in6,$in6,$in6,$inpperm
1663 vcipher
$out5,$out5,v24
1664 stvx_u
$in5,$x50,$out
1665 le?vperm
$in7,$in7,$in7,$inpperm
1666 vcipher
$out6,$out6,v24
1667 stvx_u
$in6,$x60,$out
1668 vcipher
$out7,$out7,v24
1669 stvx_u
$in7,$x70,$out
1672 b Loop_ctr32_enc8x_middle
1677 blt Lctr32_enc8x_one
1679 beq Lctr32_enc8x_two
1681 blt Lctr32_enc8x_three
1683 beq Lctr32_enc8x_four
1685 blt Lctr32_enc8x_five
1687 beq Lctr32_enc8x_six
1689 blt Lctr32_enc8x_seven
1692 vcipherlast
$out0,$out0,$in0
1693 vcipherlast
$out1,$out1,$in1
1694 vcipherlast
$out2,$out2,$in2
1695 vcipherlast
$out3,$out3,$in3
1696 vcipherlast
$out4,$out4,$in4
1697 vcipherlast
$out5,$out5,$in5
1698 vcipherlast
$out6,$out6,$in6
1699 vcipherlast
$out7,$out7,$in7
1701 le?vperm
$out0,$out0,$out0,$inpperm
1702 le?vperm
$out1,$out1,$out1,$inpperm
1703 stvx_u
$out0,$x00,$out
1704 le?vperm
$out2,$out2,$out2,$inpperm
1705 stvx_u
$out1,$x10,$out
1706 le?vperm
$out3,$out3,$out3,$inpperm
1707 stvx_u
$out2,$x20,$out
1708 le?vperm
$out4,$out4,$out4,$inpperm
1709 stvx_u
$out3,$x30,$out
1710 le?vperm
$out5,$out5,$out5,$inpperm
1711 stvx_u
$out4,$x40,$out
1712 le?vperm
$out6,$out6,$out6,$inpperm
1713 stvx_u
$out5,$x50,$out
1714 le?vperm
$out7,$out7,$out7,$inpperm
1715 stvx_u
$out6,$x60,$out
1716 stvx_u
$out7,$x70,$out
1722 vcipherlast
$out0,$out0,$in1
1723 vcipherlast
$out1,$out1,$in2
1724 vcipherlast
$out2,$out2,$in3
1725 vcipherlast
$out3,$out3,$in4
1726 vcipherlast
$out4,$out4,$in5
1727 vcipherlast
$out5,$out5,$in6
1728 vcipherlast
$out6,$out6,$in7
1730 le?vperm
$out0,$out0,$out0,$inpperm
1731 le?vperm
$out1,$out1,$out1,$inpperm
1732 stvx_u
$out0,$x00,$out
1733 le?vperm
$out2,$out2,$out2,$inpperm
1734 stvx_u
$out1,$x10,$out
1735 le?vperm
$out3,$out3,$out3,$inpperm
1736 stvx_u
$out2,$x20,$out
1737 le?vperm
$out4,$out4,$out4,$inpperm
1738 stvx_u
$out3,$x30,$out
1739 le?vperm
$out5,$out5,$out5,$inpperm
1740 stvx_u
$out4,$x40,$out
1741 le?vperm
$out6,$out6,$out6,$inpperm
1742 stvx_u
$out5,$x50,$out
1743 stvx_u
$out6,$x60,$out
1749 vcipherlast
$out0,$out0,$in2
1750 vcipherlast
$out1,$out1,$in3
1751 vcipherlast
$out2,$out2,$in4
1752 vcipherlast
$out3,$out3,$in5
1753 vcipherlast
$out4,$out4,$in6
1754 vcipherlast
$out5,$out5,$in7
1756 le?vperm
$out0,$out0,$out0,$inpperm
1757 le?vperm
$out1,$out1,$out1,$inpperm
1758 stvx_u
$out0,$x00,$out
1759 le?vperm
$out2,$out2,$out2,$inpperm
1760 stvx_u
$out1,$x10,$out
1761 le?vperm
$out3,$out3,$out3,$inpperm
1762 stvx_u
$out2,$x20,$out
1763 le?vperm
$out4,$out4,$out4,$inpperm
1764 stvx_u
$out3,$x30,$out
1765 le?vperm
$out5,$out5,$out5,$inpperm
1766 stvx_u
$out4,$x40,$out
1767 stvx_u
$out5,$x50,$out
1773 vcipherlast
$out0,$out0,$in3
1774 vcipherlast
$out1,$out1,$in4
1775 vcipherlast
$out2,$out2,$in5
1776 vcipherlast
$out3,$out3,$in6
1777 vcipherlast
$out4,$out4,$in7
1779 le?vperm
$out0,$out0,$out0,$inpperm
1780 le?vperm
$out1,$out1,$out1,$inpperm
1781 stvx_u
$out0,$x00,$out
1782 le?vperm
$out2,$out2,$out2,$inpperm
1783 stvx_u
$out1,$x10,$out
1784 le?vperm
$out3,$out3,$out3,$inpperm
1785 stvx_u
$out2,$x20,$out
1786 le?vperm
$out4,$out4,$out4,$inpperm
1787 stvx_u
$out3,$x30,$out
1788 stvx_u
$out4,$x40,$out
1794 vcipherlast
$out0,$out0,$in4
1795 vcipherlast
$out1,$out1,$in5
1796 vcipherlast
$out2,$out2,$in6
1797 vcipherlast
$out3,$out3,$in7
1799 le?vperm
$out0,$out0,$out0,$inpperm
1800 le?vperm
$out1,$out1,$out1,$inpperm
1801 stvx_u
$out0,$x00,$out
1802 le?vperm
$out2,$out2,$out2,$inpperm
1803 stvx_u
$out1,$x10,$out
1804 le?vperm
$out3,$out3,$out3,$inpperm
1805 stvx_u
$out2,$x20,$out
1806 stvx_u
$out3,$x30,$out
1812 vcipherlast
$out0,$out0,$in5
1813 vcipherlast
$out1,$out1,$in6
1814 vcipherlast
$out2,$out2,$in7
1816 le?vperm
$out0,$out0,$out0,$inpperm
1817 le?vperm
$out1,$out1,$out1,$inpperm
1818 stvx_u
$out0,$x00,$out
1819 le?vperm
$out2,$out2,$out2,$inpperm
1820 stvx_u
$out1,$x10,$out
1821 stvx_u
$out2,$x20,$out
1827 vcipherlast
$out0,$out0,$in6
1828 vcipherlast
$out1,$out1,$in7
1830 le?vperm
$out0,$out0,$out0,$inpperm
1831 le?vperm
$out1,$out1,$out1,$inpperm
1832 stvx_u
$out0,$x00,$out
1833 stvx_u
$out1,$x10,$out
1839 vcipherlast
$out0,$out0,$in7
1841 le?vperm
$out0,$out0,$out0,$inpperm
1848 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1850 stvx
$inpperm,r11
,$sp
1852 stvx
$inpperm,r10
,$sp
1854 stvx
$inpperm,r11
,$sp
1856 stvx
$inpperm,r10
,$sp
1858 stvx
$inpperm,r11
,$sp
1860 stvx
$inpperm,r10
,$sp
1862 stvx
$inpperm,r11
,$sp
1866 lvx v20
,r10
,$sp # ABI says so
1888 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1889 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1890 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1891 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1892 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1893 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1894 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1897 .byte
0,12,0x04,0,0x80,6,6,0
1899 .size
.${prefix
}_ctr32_encrypt_blocks
,.-.${prefix
}_ctr32_encrypt_blocks
1903 #########################################################################
1904 {{{ # XTS procedures #
1905 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1906 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1907 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1908 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1909 my $taillen = $key2;
1911 ($inp,$idx) = ($idx,$inp); # reassign
1914 .globl
.${prefix
}_xts_encrypt
1916 .${prefix
}_xts_encrypt
:
1917 mr
$inp,r3
# reassign
1923 mfspr r12
,256 # save vrsave
1927 vspltisb
$seven,0x07 # 0x070707..07
1928 le?lvsl
$leperm,r11
,r11
1929 le?vspltisb
$tmp,0x0f
1930 le?vxor
$leperm,$leperm,$seven
1933 lvx
$tweak,0,$ivp # load [unaligned] iv
1934 lvsl
$inpperm,0,$ivp
1935 lvx
$inptail,$idx,$ivp
1936 le?vxor
$inpperm,$inpperm,$tmp
1937 vperm
$tweak,$tweak,$inptail,$inpperm
1939 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
1940 lwz
$rounds,240($key2)
1941 srwi
$rounds,$rounds,1
1942 subi
$rounds,$rounds,1
1946 lvsr
$inpperm,0,r11
# prepare for unaligned load
1948 addi
$inp,$inp,15 # 15 is not typo
1949 le?vxor
$inpperm,$inpperm,$tmp
1951 lvx
$rndkey0,0,$key2
1952 lvx
$rndkey1,$idx,$key2
1954 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1955 vxor
$tweak,$tweak,$rndkey0
1956 lvx
$rndkey0,$idx,$key2
1961 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1962 vcipher
$tweak,$tweak,$rndkey1
1963 lvx
$rndkey1,$idx,$key2
1965 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1966 vcipher
$tweak,$tweak,$rndkey0
1967 lvx
$rndkey0,$idx,$key2
1971 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1972 vcipher
$tweak,$tweak,$rndkey1
1973 lvx
$rndkey1,$idx,$key2
1975 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1976 vcipherlast
$tweak,$tweak,$rndkey0
1981 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
1982 lwz
$rounds,240($key1)
1983 srwi
$rounds,$rounds,1
1984 subi
$rounds,$rounds,1
1987 vslb
$eighty7,$seven,$seven # 0x808080..80
1988 vor
$eighty7,$eighty7,$seven # 0x878787..87
1989 vspltisb
$tmp,1 # 0x010101..01
1990 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
1993 bge _aesp8_xts_encrypt6x
1995 andi
. $taillen,$len,15
1997 subi
$taillen,$taillen,16
2002 lvx
$rndkey0,0,$key1
2003 lvx
$rndkey1,$idx,$key1
2005 vperm
$inout,$inout,$inptail,$inpperm
2006 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2007 vxor
$inout,$inout,$tweak
2008 vxor
$inout,$inout,$rndkey0
2009 lvx
$rndkey0,$idx,$key1
2016 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2017 vcipher
$inout,$inout,$rndkey1
2018 lvx
$rndkey1,$idx,$key1
2020 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2021 vcipher
$inout,$inout,$rndkey0
2022 lvx
$rndkey0,$idx,$key1
2026 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2027 vcipher
$inout,$inout,$rndkey1
2028 lvx
$rndkey1,$idx,$key1
2030 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2031 vxor
$rndkey0,$rndkey0,$tweak
2032 vcipherlast
$output,$inout,$rndkey0
2034 le?vperm
$tmp,$output,$output,$leperm
2036 le?stvx_u
$tmp,0,$out
2037 be?stvx_u
$output,0,$out
2046 lvx
$rndkey0,0,$key1
2047 lvx
$rndkey1,$idx,$key1
2055 vsrab
$tmp,$tweak,$seven # next tweak value
2056 vaddubm
$tweak,$tweak,$tweak
2057 vsldoi
$tmp,$tmp,$tmp,15
2058 vand
$tmp,$tmp,$eighty7
2059 vxor
$tweak,$tweak,$tmp
2061 vperm
$inout,$inout,$inptail,$inpperm
2062 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2063 vxor
$inout,$inout,$tweak
2064 vxor
$output,$output,$rndkey0 # just in case $len<16
2065 vxor
$inout,$inout,$rndkey0
2066 lvx
$rndkey0,$idx,$key1
2073 vxor
$output,$output,$tweak
2074 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2075 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2077 vperm
$inptail,$inptail,$tmp,$inpperm
2078 vsel
$inout,$inout,$output,$inptail
2087 bdnz Loop_xts_enc_steal
2090 b Loop_xts_enc
# one more time...
2093 mtspr
256,r12
# restore vrsave
2097 .byte
0,12,0x04,0,0x80,6,6,0
2099 .size
.${prefix
}_xts_encrypt
,.-.${prefix
}_xts_encrypt
2101 .globl
.${prefix
}_xts_decrypt
2103 .${prefix
}_xts_decrypt
:
2104 mr
$inp,r3
# reassign
2110 mfspr r12
,256 # save vrsave
2119 vspltisb
$seven,0x07 # 0x070707..07
2120 le?lvsl
$leperm,r11
,r11
2121 le?vspltisb
$tmp,0x0f
2122 le?vxor
$leperm,$leperm,$seven
2125 lvx
$tweak,0,$ivp # load [unaligned] iv
2126 lvsl
$inpperm,0,$ivp
2127 lvx
$inptail,$idx,$ivp
2128 le?vxor
$inpperm,$inpperm,$tmp
2129 vperm
$tweak,$tweak,$inptail,$inpperm
2131 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
2132 lwz
$rounds,240($key2)
2133 srwi
$rounds,$rounds,1
2134 subi
$rounds,$rounds,1
2138 lvsr
$inpperm,0,r11
# prepare for unaligned load
2140 addi
$inp,$inp,15 # 15 is not typo
2141 le?vxor
$inpperm,$inpperm,$tmp
2143 lvx
$rndkey0,0,$key2
2144 lvx
$rndkey1,$idx,$key2
2146 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2147 vxor
$tweak,$tweak,$rndkey0
2148 lvx
$rndkey0,$idx,$key2
2153 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2154 vcipher
$tweak,$tweak,$rndkey1
2155 lvx
$rndkey1,$idx,$key2
2157 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2158 vcipher
$tweak,$tweak,$rndkey0
2159 lvx
$rndkey0,$idx,$key2
2163 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2164 vcipher
$tweak,$tweak,$rndkey1
2165 lvx
$rndkey1,$idx,$key2
2167 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2168 vcipherlast
$tweak,$tweak,$rndkey0
2173 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
2174 lwz
$rounds,240($key1)
2175 srwi
$rounds,$rounds,1
2176 subi
$rounds,$rounds,1
2179 vslb
$eighty7,$seven,$seven # 0x808080..80
2180 vor
$eighty7,$eighty7,$seven # 0x878787..87
2181 vspltisb
$tmp,1 # 0x010101..01
2182 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2185 bge _aesp8_xts_decrypt6x
2187 lvx
$rndkey0,0,$key1
2188 lvx
$rndkey1,$idx,$key1
2190 vperm
$inout,$inout,$inptail,$inpperm
2191 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2192 vxor
$inout,$inout,$tweak
2193 vxor
$inout,$inout,$rndkey0
2194 lvx
$rndkey0,$idx,$key1
2204 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2205 vncipher
$inout,$inout,$rndkey1
2206 lvx
$rndkey1,$idx,$key1
2208 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2209 vncipher
$inout,$inout,$rndkey0
2210 lvx
$rndkey0,$idx,$key1
2214 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2215 vncipher
$inout,$inout,$rndkey1
2216 lvx
$rndkey1,$idx,$key1
2218 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2219 vxor
$rndkey0,$rndkey0,$tweak
2220 vncipherlast
$output,$inout,$rndkey0
2222 le?vperm
$tmp,$output,$output,$leperm
2224 le?stvx_u
$tmp,0,$out
2225 be?stvx_u
$output,0,$out
2234 lvx
$rndkey0,0,$key1
2235 lvx
$rndkey1,$idx,$key1
2238 vsrab
$tmp,$tweak,$seven # next tweak value
2239 vaddubm
$tweak,$tweak,$tweak
2240 vsldoi
$tmp,$tmp,$tmp,15
2241 vand
$tmp,$tmp,$eighty7
2242 vxor
$tweak,$tweak,$tmp
2244 vperm
$inout,$inout,$inptail,$inpperm
2245 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2246 vxor
$inout,$inout,$tweak
2247 vxor
$inout,$inout,$rndkey0
2248 lvx
$rndkey0,$idx,$key1
2256 vsrab
$tmp,$tweak,$seven # next tweak value
2257 vaddubm
$tweak1,$tweak,$tweak
2258 vsldoi
$tmp,$tmp,$tmp,15
2259 vand
$tmp,$tmp,$eighty7
2260 vxor
$tweak1,$tweak1,$tmp
2265 vxor
$inout,$inout,$tweak # :-(
2266 vxor
$inout,$inout,$tweak1 # :-)
2269 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2270 vncipher
$inout,$inout,$rndkey1
2271 lvx
$rndkey1,$idx,$key1
2273 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2274 vncipher
$inout,$inout,$rndkey0
2275 lvx
$rndkey0,$idx,$key1
2277 bdnz Loop_xts_dec_short
2279 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2280 vncipher
$inout,$inout,$rndkey1
2281 lvx
$rndkey1,$idx,$key1
2283 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vxor
$rndkey0,$rndkey0,$tweak1
2285 vncipherlast
$output,$inout,$rndkey0
2287 le?vperm
$tmp,$output,$output,$leperm
2289 le?stvx_u
$tmp,0,$out
2290 be?stvx_u
$output,0,$out
2295 lvx
$rndkey0,0,$key1
2296 lvx
$rndkey1,$idx,$key1
2298 vperm
$inout,$inout,$inptail,$inpperm
2299 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2301 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2302 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2304 vperm
$inptail,$inptail,$tmp,$inpperm
2305 vsel
$inout,$inout,$output,$inptail
2307 vxor
$rndkey0,$rndkey0,$tweak
2308 vxor
$inout,$inout,$rndkey0
2309 lvx
$rndkey0,$idx,$key1
2318 bdnz Loop_xts_dec_steal
2321 b Loop_xts_dec
# one more time...
2324 mtspr
256,r12
# restore vrsave
2328 .byte
0,12,0x04,0,0x80,6,6,0
2330 .size
.${prefix
}_xts_decrypt
,.-.${prefix
}_xts_decrypt
2332 #########################################################################
2333 {{ # Optimized XTS procedures #
2335 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
2336 $x00=0 if ($flavour =~ /osx/);
2337 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2338 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2339 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2340 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2341 # v26-v31 last 6 round keys
2342 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2347 _aesp8_xts_encrypt6x
:
2348 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2350 li r7
,`$FRAME+8*16+15`
2351 li r8
,`$FRAME+8*16+31`
2352 $PUSH r0
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2353 stvx v20
,r7
,$sp # ABI says so
2377 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2379 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2381 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2383 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2385 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2387 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2389 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2393 subi
$rounds,$rounds,3 # -4 in total
2395 lvx
$rndkey0,$x00,$key1 # load key schedule
2397 addi
$key1,$key1,0x20
2399 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
2400 addi
$key_,$sp,$FRAME+15
2404 ?vperm v24
,v30
,v31
,$keyperm
2406 addi
$key1,$key1,0x20
2407 stvx v24
,$x00,$key_ # off-load round[1]
2408 ?vperm v25
,v31
,v30
,$keyperm
2410 stvx v25
,$x10,$key_ # off-load round[2]
2411 addi
$key_,$key_,0x20
2412 bdnz Load_xts_enc_key
2415 ?vperm v24
,v30
,v31
,$keyperm
2417 stvx v24
,$x00,$key_ # off-load round[3]
2418 ?vperm v25
,v31
,v26
,$keyperm
2420 stvx v25
,$x10,$key_ # off-load round[4]
2421 addi
$key_,$sp,$FRAME+15 # rewind $key_
2422 ?vperm v26
,v26
,v27
,$keyperm
2424 ?vperm v27
,v27
,v28
,$keyperm
2426 ?vperm v28
,v28
,v29
,$keyperm
2428 ?vperm v29
,v29
,v30
,$keyperm
2429 lvx
$twk5,$x70,$key1 # borrow $twk5
2430 ?vperm v30
,v30
,v31
,$keyperm
2431 lvx v24
,$x00,$key_ # pre-load round[1]
2432 ?vperm v31
,v31
,$twk5,$keyperm
2433 lvx v25
,$x10,$key_ # pre-load round[2]
2435 vperm
$in0,$inout,$inptail,$inpperm
2436 subi
$inp,$inp,31 # undo "caller"
2437 vxor
$twk0,$tweak,$rndkey0
2438 vsrab
$tmp,$tweak,$seven # next tweak value
2439 vaddubm
$tweak,$tweak,$tweak
2440 vsldoi
$tmp,$tmp,$tmp,15
2441 vand
$tmp,$tmp,$eighty7
2442 vxor
$out0,$in0,$twk0
2443 vxor
$tweak,$tweak,$tmp
2445 lvx_u
$in1,$x10,$inp
2446 vxor
$twk1,$tweak,$rndkey0
2447 vsrab
$tmp,$tweak,$seven # next tweak value
2448 vaddubm
$tweak,$tweak,$tweak
2449 vsldoi
$tmp,$tmp,$tmp,15
2450 le?vperm
$in1,$in1,$in1,$leperm
2451 vand
$tmp,$tmp,$eighty7
2452 vxor
$out1,$in1,$twk1
2453 vxor
$tweak,$tweak,$tmp
2455 lvx_u
$in2,$x20,$inp
2456 andi
. $taillen,$len,15
2457 vxor
$twk2,$tweak,$rndkey0
2458 vsrab
$tmp,$tweak,$seven # next tweak value
2459 vaddubm
$tweak,$tweak,$tweak
2460 vsldoi
$tmp,$tmp,$tmp,15
2461 le?vperm
$in2,$in2,$in2,$leperm
2462 vand
$tmp,$tmp,$eighty7
2463 vxor
$out2,$in2,$twk2
2464 vxor
$tweak,$tweak,$tmp
2466 lvx_u
$in3,$x30,$inp
2467 sub $len,$len,$taillen
2468 vxor
$twk3,$tweak,$rndkey0
2469 vsrab
$tmp,$tweak,$seven # next tweak value
2470 vaddubm
$tweak,$tweak,$tweak
2471 vsldoi
$tmp,$tmp,$tmp,15
2472 le?vperm
$in3,$in3,$in3,$leperm
2473 vand
$tmp,$tmp,$eighty7
2474 vxor
$out3,$in3,$twk3
2475 vxor
$tweak,$tweak,$tmp
2477 lvx_u
$in4,$x40,$inp
2479 vxor
$twk4,$tweak,$rndkey0
2480 vsrab
$tmp,$tweak,$seven # next tweak value
2481 vaddubm
$tweak,$tweak,$tweak
2482 vsldoi
$tmp,$tmp,$tmp,15
2483 le?vperm
$in4,$in4,$in4,$leperm
2484 vand
$tmp,$tmp,$eighty7
2485 vxor
$out4,$in4,$twk4
2486 vxor
$tweak,$tweak,$tmp
2488 lvx_u
$in5,$x50,$inp
2490 vxor
$twk5,$tweak,$rndkey0
2491 vsrab
$tmp,$tweak,$seven # next tweak value
2492 vaddubm
$tweak,$tweak,$tweak
2493 vsldoi
$tmp,$tmp,$tmp,15
2494 le?vperm
$in5,$in5,$in5,$leperm
2495 vand
$tmp,$tmp,$eighty7
2496 vxor
$out5,$in5,$twk5
2497 vxor
$tweak,$tweak,$tmp
2499 vxor v31
,v31
,$rndkey0
2505 vcipher
$out0,$out0,v24
2506 vcipher
$out1,$out1,v24
2507 vcipher
$out2,$out2,v24
2508 vcipher
$out3,$out3,v24
2509 vcipher
$out4,$out4,v24
2510 vcipher
$out5,$out5,v24
2511 lvx v24
,$x20,$key_ # round[3]
2512 addi
$key_,$key_,0x20
2514 vcipher
$out0,$out0,v25
2515 vcipher
$out1,$out1,v25
2516 vcipher
$out2,$out2,v25
2517 vcipher
$out3,$out3,v25
2518 vcipher
$out4,$out4,v25
2519 vcipher
$out5,$out5,v25
2520 lvx v25
,$x10,$key_ # round[4]
2523 subic
$len,$len,96 # $len-=96
2524 vxor
$in0,$twk0,v31
# xor with last round key
2525 vcipher
$out0,$out0,v24
2526 vcipher
$out1,$out1,v24
2527 vsrab
$tmp,$tweak,$seven # next tweak value
2528 vxor
$twk0,$tweak,$rndkey0
2529 vaddubm
$tweak,$tweak,$tweak
2530 vcipher
$out2,$out2,v24
2531 vcipher
$out3,$out3,v24
2532 vsldoi
$tmp,$tmp,$tmp,15
2533 vcipher
$out4,$out4,v24
2534 vcipher
$out5,$out5,v24
2536 subfe
. r0
,r0
,r0
# borrow?-1:0
2537 vand
$tmp,$tmp,$eighty7
2538 vcipher
$out0,$out0,v25
2539 vcipher
$out1,$out1,v25
2540 vxor
$tweak,$tweak,$tmp
2541 vcipher
$out2,$out2,v25
2542 vcipher
$out3,$out3,v25
2544 vsrab
$tmp,$tweak,$seven # next tweak value
2545 vxor
$twk1,$tweak,$rndkey0
2546 vcipher
$out4,$out4,v25
2547 vcipher
$out5,$out5,v25
2550 vaddubm
$tweak,$tweak,$tweak
2551 vsldoi
$tmp,$tmp,$tmp,15
2552 vcipher
$out0,$out0,v26
2553 vcipher
$out1,$out1,v26
2554 vand
$tmp,$tmp,$eighty7
2555 vcipher
$out2,$out2,v26
2556 vcipher
$out3,$out3,v26
2557 vxor
$tweak,$tweak,$tmp
2558 vcipher
$out4,$out4,v26
2559 vcipher
$out5,$out5,v26
2561 add
$inp,$inp,r0
# $inp is adjusted in such
2562 # way that at exit from the
2563 # loop inX-in5 are loaded
2566 vsrab
$tmp,$tweak,$seven # next tweak value
2567 vxor
$twk2,$tweak,$rndkey0
2568 vaddubm
$tweak,$tweak,$tweak
2569 vcipher
$out0,$out0,v27
2570 vcipher
$out1,$out1,v27
2571 vsldoi
$tmp,$tmp,$tmp,15
2572 vcipher
$out2,$out2,v27
2573 vcipher
$out3,$out3,v27
2574 vand
$tmp,$tmp,$eighty7
2575 vcipher
$out4,$out4,v27
2576 vcipher
$out5,$out5,v27
2578 addi
$key_,$sp,$FRAME+15 # rewind $key_
2579 vxor
$tweak,$tweak,$tmp
2580 vcipher
$out0,$out0,v28
2581 vcipher
$out1,$out1,v28
2583 vsrab
$tmp,$tweak,$seven # next tweak value
2584 vxor
$twk3,$tweak,$rndkey0
2585 vcipher
$out2,$out2,v28
2586 vcipher
$out3,$out3,v28
2587 vaddubm
$tweak,$tweak,$tweak
2588 vsldoi
$tmp,$tmp,$tmp,15
2589 vcipher
$out4,$out4,v28
2590 vcipher
$out5,$out5,v28
2591 lvx v24
,$x00,$key_ # re-pre-load round[1]
2592 vand
$tmp,$tmp,$eighty7
2594 vcipher
$out0,$out0,v29
2595 vcipher
$out1,$out1,v29
2596 vxor
$tweak,$tweak,$tmp
2597 vcipher
$out2,$out2,v29
2598 vcipher
$out3,$out3,v29
2600 vsrab
$tmp,$tweak,$seven # next tweak value
2601 vxor
$twk4,$tweak,$rndkey0
2602 vcipher
$out4,$out4,v29
2603 vcipher
$out5,$out5,v29
2604 lvx v25
,$x10,$key_ # re-pre-load round[2]
2605 vaddubm
$tweak,$tweak,$tweak
2606 vsldoi
$tmp,$tmp,$tmp,15
2608 vcipher
$out0,$out0,v30
2609 vcipher
$out1,$out1,v30
2610 vand
$tmp,$tmp,$eighty7
2611 vcipher
$out2,$out2,v30
2612 vcipher
$out3,$out3,v30
2613 vxor
$tweak,$tweak,$tmp
2614 vcipher
$out4,$out4,v30
2615 vcipher
$out5,$out5,v30
2617 vsrab
$tmp,$tweak,$seven # next tweak value
2618 vxor
$twk5,$tweak,$rndkey0
2620 vcipherlast
$out0,$out0,$in0
2621 lvx_u
$in0,$x00,$inp # load next input block
2622 vaddubm
$tweak,$tweak,$tweak
2623 vsldoi
$tmp,$tmp,$tmp,15
2624 vcipherlast
$out1,$out1,$in1
2625 lvx_u
$in1,$x10,$inp
2626 vcipherlast
$out2,$out2,$in2
2627 le?vperm
$in0,$in0,$in0,$leperm
2628 lvx_u
$in2,$x20,$inp
2629 vand
$tmp,$tmp,$eighty7
2630 vcipherlast
$out3,$out3,$in3
2631 le?vperm
$in1,$in1,$in1,$leperm
2632 lvx_u
$in3,$x30,$inp
2633 vcipherlast
$out4,$out4,$in4
2634 le?vperm
$in2,$in2,$in2,$leperm
2635 lvx_u
$in4,$x40,$inp
2636 vxor
$tweak,$tweak,$tmp
2637 vcipherlast
$tmp,$out5,$in5 # last block might be needed
2639 le?vperm
$in3,$in3,$in3,$leperm
2640 lvx_u
$in5,$x50,$inp
2642 le?vperm
$in4,$in4,$in4,$leperm
2643 le?vperm
$in5,$in5,$in5,$leperm
2645 le?vperm
$out0,$out0,$out0,$leperm
2646 le?vperm
$out1,$out1,$out1,$leperm
2647 stvx_u
$out0,$x00,$out # store output
2648 vxor
$out0,$in0,$twk0
2649 le?vperm
$out2,$out2,$out2,$leperm
2650 stvx_u
$out1,$x10,$out
2651 vxor
$out1,$in1,$twk1
2652 le?vperm
$out3,$out3,$out3,$leperm
2653 stvx_u
$out2,$x20,$out
2654 vxor
$out2,$in2,$twk2
2655 le?vperm
$out4,$out4,$out4,$leperm
2656 stvx_u
$out3,$x30,$out
2657 vxor
$out3,$in3,$twk3
2658 le?vperm
$out5,$tmp,$tmp,$leperm
2659 stvx_u
$out4,$x40,$out
2660 vxor
$out4,$in4,$twk4
2661 le?stvx_u
$out5,$x50,$out
2662 be?stvx_u
$tmp, $x50,$out
2663 vxor
$out5,$in5,$twk5
2667 beq Loop_xts_enc6x
# did $len-=96 borrow?
2669 addic
. $len,$len,0x60
2676 blt Lxts_enc6x_three
2681 vxor
$out0,$in1,$twk0
2682 vxor
$out1,$in2,$twk1
2683 vxor
$out2,$in3,$twk2
2684 vxor
$out3,$in4,$twk3
2685 vxor
$out4,$in5,$twk4
2689 le?vperm
$out0,$out0,$out0,$leperm
2690 vmr
$twk0,$twk5 # unused tweak
2691 le?vperm
$out1,$out1,$out1,$leperm
2692 stvx_u
$out0,$x00,$out # store output
2693 le?vperm
$out2,$out2,$out2,$leperm
2694 stvx_u
$out1,$x10,$out
2695 le?vperm
$out3,$out3,$out3,$leperm
2696 stvx_u
$out2,$x20,$out
2697 vxor
$tmp,$out4,$twk5 # last block prep for stealing
2698 le?vperm
$out4,$out4,$out4,$leperm
2699 stvx_u
$out3,$x30,$out
2700 stvx_u
$out4,$x40,$out
2702 bne Lxts_enc6x_steal
2707 vxor
$out0,$in2,$twk0
2708 vxor
$out1,$in3,$twk1
2709 vxor
$out2,$in4,$twk2
2710 vxor
$out3,$in5,$twk3
2711 vxor
$out4,$out4,$out4
2715 le?vperm
$out0,$out0,$out0,$leperm
2716 vmr
$twk0,$twk4 # unused tweak
2717 le?vperm
$out1,$out1,$out1,$leperm
2718 stvx_u
$out0,$x00,$out # store output
2719 le?vperm
$out2,$out2,$out2,$leperm
2720 stvx_u
$out1,$x10,$out
2721 vxor
$tmp,$out3,$twk4 # last block prep for stealing
2722 le?vperm
$out3,$out3,$out3,$leperm
2723 stvx_u
$out2,$x20,$out
2724 stvx_u
$out3,$x30,$out
2726 bne Lxts_enc6x_steal
2731 vxor
$out0,$in3,$twk0
2732 vxor
$out1,$in4,$twk1
2733 vxor
$out2,$in5,$twk2
2734 vxor
$out3,$out3,$out3
2735 vxor
$out4,$out4,$out4
2739 le?vperm
$out0,$out0,$out0,$leperm
2740 vmr
$twk0,$twk3 # unused tweak
2741 le?vperm
$out1,$out1,$out1,$leperm
2742 stvx_u
$out0,$x00,$out # store output
2743 vxor
$tmp,$out2,$twk3 # last block prep for stealing
2744 le?vperm
$out2,$out2,$out2,$leperm
2745 stvx_u
$out1,$x10,$out
2746 stvx_u
$out2,$x20,$out
2748 bne Lxts_enc6x_steal
2753 vxor
$out0,$in4,$twk0
2754 vxor
$out1,$in5,$twk1
2755 vxor
$out2,$out2,$out2
2756 vxor
$out3,$out3,$out3
2757 vxor
$out4,$out4,$out4
2761 le?vperm
$out0,$out0,$out0,$leperm
2762 vmr
$twk0,$twk2 # unused tweak
2763 vxor
$tmp,$out1,$twk2 # last block prep for stealing
2764 le?vperm
$out1,$out1,$out1,$leperm
2765 stvx_u
$out0,$x00,$out # store output
2766 stvx_u
$out1,$x10,$out
2768 bne Lxts_enc6x_steal
2773 vxor
$out0,$in5,$twk0
2776 vcipher
$out0,$out0,v24
2777 lvx v24
,$x20,$key_ # round[3]
2778 addi
$key_,$key_,0x20
2780 vcipher
$out0,$out0,v25
2781 lvx v25
,$x10,$key_ # round[4]
2784 add
$inp,$inp,$taillen
2786 vcipher
$out0,$out0,v24
2789 vcipher
$out0,$out0,v25
2791 lvsr
$inpperm,0,$taillen
2792 vcipher
$out0,$out0,v26
2795 vcipher
$out0,$out0,v27
2797 addi
$key_,$sp,$FRAME+15 # rewind $key_
2798 vcipher
$out0,$out0,v28
2799 lvx v24
,$x00,$key_ # re-pre-load round[1]
2801 vcipher
$out0,$out0,v29
2802 lvx v25
,$x10,$key_ # re-pre-load round[2]
2803 vxor
$twk0,$twk0,v31
2805 le?vperm
$in0,$in0,$in0,$leperm
2806 vcipher
$out0,$out0,v30
2808 vperm
$in0,$in0,$in0,$inpperm
2809 vcipherlast
$out0,$out0,$twk0
2811 vmr
$twk0,$twk1 # unused tweak
2812 vxor
$tmp,$out0,$twk1 # last block prep for stealing
2813 le?vperm
$out0,$out0,$out0,$leperm
2814 stvx_u
$out0,$x00,$out # store output
2816 bne Lxts_enc6x_steal
2824 add
$inp,$inp,$taillen
2827 lvsr
$inpperm,0,$taillen # $in5 is no more
2828 le?vperm
$in0,$in0,$in0,$leperm
2829 vperm
$in0,$in0,$in0,$inpperm
2830 vxor
$tmp,$tmp,$twk0
2832 vxor
$in0,$in0,$twk0
2833 vxor
$out0,$out0,$out0
2835 vperm
$out0,$out0,$out1,$inpperm
2836 vsel
$out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2841 Loop_xts_enc6x_steal
:
2844 bdnz Loop_xts_enc6x_steal
2848 b Loop_xts_enc1x
# one more time...
2855 stvx
$seven,r10
,$sp # wipe copies of round keys
2873 lvx v20
,r10
,$sp # ABI says so
2895 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2896 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2897 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2898 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2899 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2900 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2901 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2904 .byte
0,12,0x04,1,0x80,6,6,0
2909 vcipher
$out0,$out0,v24
2910 vcipher
$out1,$out1,v24
2911 vcipher
$out2,$out2,v24
2912 vcipher
$out3,$out3,v24
2913 vcipher
$out4,$out4,v24
2914 lvx v24
,$x20,$key_ # round[3]
2915 addi
$key_,$key_,0x20
2917 vcipher
$out0,$out0,v25
2918 vcipher
$out1,$out1,v25
2919 vcipher
$out2,$out2,v25
2920 vcipher
$out3,$out3,v25
2921 vcipher
$out4,$out4,v25
2922 lvx v25
,$x10,$key_ # round[4]
2923 bdnz _aesp8_xts_enc5x
2925 add
$inp,$inp,$taillen
2927 vcipher
$out0,$out0,v24
2928 vcipher
$out1,$out1,v24
2929 vcipher
$out2,$out2,v24
2930 vcipher
$out3,$out3,v24
2931 vcipher
$out4,$out4,v24
2934 vcipher
$out0,$out0,v25
2935 vcipher
$out1,$out1,v25
2936 vcipher
$out2,$out2,v25
2937 vcipher
$out3,$out3,v25
2938 vcipher
$out4,$out4,v25
2939 vxor
$twk0,$twk0,v31
2941 vcipher
$out0,$out0,v26
2942 lvsr
$inpperm,r0
,$taillen # $in5 is no more
2943 vcipher
$out1,$out1,v26
2944 vcipher
$out2,$out2,v26
2945 vcipher
$out3,$out3,v26
2946 vcipher
$out4,$out4,v26
2949 vcipher
$out0,$out0,v27
2951 vcipher
$out1,$out1,v27
2952 vcipher
$out2,$out2,v27
2953 vcipher
$out3,$out3,v27
2954 vcipher
$out4,$out4,v27
2957 addi
$key_,$sp,$FRAME+15 # rewind $key_
2958 vcipher
$out0,$out0,v28
2959 vcipher
$out1,$out1,v28
2960 vcipher
$out2,$out2,v28
2961 vcipher
$out3,$out3,v28
2962 vcipher
$out4,$out4,v28
2963 lvx v24
,$x00,$key_ # re-pre-load round[1]
2966 vcipher
$out0,$out0,v29
2967 le?vperm
$in0,$in0,$in0,$leperm
2968 vcipher
$out1,$out1,v29
2969 vcipher
$out2,$out2,v29
2970 vcipher
$out3,$out3,v29
2971 vcipher
$out4,$out4,v29
2972 lvx v25
,$x10,$key_ # re-pre-load round[2]
2975 vcipher
$out0,$out0,v30
2976 vperm
$in0,$in0,$in0,$inpperm
2977 vcipher
$out1,$out1,v30
2978 vcipher
$out2,$out2,v30
2979 vcipher
$out3,$out3,v30
2980 vcipher
$out4,$out4,v30
2982 vcipherlast
$out0,$out0,$twk0
2983 vcipherlast
$out1,$out1,$in1
2984 vcipherlast
$out2,$out2,$in2
2985 vcipherlast
$out3,$out3,$in3
2986 vcipherlast
$out4,$out4,$in4
2989 .byte
0,12,0x14,0,0,0,0,0
2992 _aesp8_xts_decrypt6x
:
2993 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2995 li r7
,`$FRAME+8*16+15`
2996 li r8
,`$FRAME+8*16+31`
2997 $PUSH r0
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2998 stvx v20
,r7
,$sp # ABI says so
3022 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3024 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3026 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3028 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3030 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3032 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3034 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3038 subi
$rounds,$rounds,3 # -4 in total
3040 lvx
$rndkey0,$x00,$key1 # load key schedule
3042 addi
$key1,$key1,0x20
3044 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
3045 addi
$key_,$sp,$FRAME+15
3049 ?vperm v24
,v30
,v31
,$keyperm
3051 addi
$key1,$key1,0x20
3052 stvx v24
,$x00,$key_ # off-load round[1]
3053 ?vperm v25
,v31
,v30
,$keyperm
3055 stvx v25
,$x10,$key_ # off-load round[2]
3056 addi
$key_,$key_,0x20
3057 bdnz Load_xts_dec_key
3060 ?vperm v24
,v30
,v31
,$keyperm
3062 stvx v24
,$x00,$key_ # off-load round[3]
3063 ?vperm v25
,v31
,v26
,$keyperm
3065 stvx v25
,$x10,$key_ # off-load round[4]
3066 addi
$key_,$sp,$FRAME+15 # rewind $key_
3067 ?vperm v26
,v26
,v27
,$keyperm
3069 ?vperm v27
,v27
,v28
,$keyperm
3071 ?vperm v28
,v28
,v29
,$keyperm
3073 ?vperm v29
,v29
,v30
,$keyperm
3074 lvx
$twk5,$x70,$key1 # borrow $twk5
3075 ?vperm v30
,v30
,v31
,$keyperm
3076 lvx v24
,$x00,$key_ # pre-load round[1]
3077 ?vperm v31
,v31
,$twk5,$keyperm
3078 lvx v25
,$x10,$key_ # pre-load round[2]
3080 vperm
$in0,$inout,$inptail,$inpperm
3081 subi
$inp,$inp,31 # undo "caller"
3082 vxor
$twk0,$tweak,$rndkey0
3083 vsrab
$tmp,$tweak,$seven # next tweak value
3084 vaddubm
$tweak,$tweak,$tweak
3085 vsldoi
$tmp,$tmp,$tmp,15
3086 vand
$tmp,$tmp,$eighty7
3087 vxor
$out0,$in0,$twk0
3088 vxor
$tweak,$tweak,$tmp
3090 lvx_u
$in1,$x10,$inp
3091 vxor
$twk1,$tweak,$rndkey0
3092 vsrab
$tmp,$tweak,$seven # next tweak value
3093 vaddubm
$tweak,$tweak,$tweak
3094 vsldoi
$tmp,$tmp,$tmp,15
3095 le?vperm
$in1,$in1,$in1,$leperm
3096 vand
$tmp,$tmp,$eighty7
3097 vxor
$out1,$in1,$twk1
3098 vxor
$tweak,$tweak,$tmp
3100 lvx_u
$in2,$x20,$inp
3101 andi
. $taillen,$len,15
3102 vxor
$twk2,$tweak,$rndkey0
3103 vsrab
$tmp,$tweak,$seven # next tweak value
3104 vaddubm
$tweak,$tweak,$tweak
3105 vsldoi
$tmp,$tmp,$tmp,15
3106 le?vperm
$in2,$in2,$in2,$leperm
3107 vand
$tmp,$tmp,$eighty7
3108 vxor
$out2,$in2,$twk2
3109 vxor
$tweak,$tweak,$tmp
3111 lvx_u
$in3,$x30,$inp
3112 sub $len,$len,$taillen
3113 vxor
$twk3,$tweak,$rndkey0
3114 vsrab
$tmp,$tweak,$seven # next tweak value
3115 vaddubm
$tweak,$tweak,$tweak
3116 vsldoi
$tmp,$tmp,$tmp,15
3117 le?vperm
$in3,$in3,$in3,$leperm
3118 vand
$tmp,$tmp,$eighty7
3119 vxor
$out3,$in3,$twk3
3120 vxor
$tweak,$tweak,$tmp
3122 lvx_u
$in4,$x40,$inp
3124 vxor
$twk4,$tweak,$rndkey0
3125 vsrab
$tmp,$tweak,$seven # next tweak value
3126 vaddubm
$tweak,$tweak,$tweak
3127 vsldoi
$tmp,$tmp,$tmp,15
3128 le?vperm
$in4,$in4,$in4,$leperm
3129 vand
$tmp,$tmp,$eighty7
3130 vxor
$out4,$in4,$twk4
3131 vxor
$tweak,$tweak,$tmp
3133 lvx_u
$in5,$x50,$inp
3135 vxor
$twk5,$tweak,$rndkey0
3136 vsrab
$tmp,$tweak,$seven # next tweak value
3137 vaddubm
$tweak,$tweak,$tweak
3138 vsldoi
$tmp,$tmp,$tmp,15
3139 le?vperm
$in5,$in5,$in5,$leperm
3140 vand
$tmp,$tmp,$eighty7
3141 vxor
$out5,$in5,$twk5
3142 vxor
$tweak,$tweak,$tmp
3144 vxor v31
,v31
,$rndkey0
3150 vncipher
$out0,$out0,v24
3151 vncipher
$out1,$out1,v24
3152 vncipher
$out2,$out2,v24
3153 vncipher
$out3,$out3,v24
3154 vncipher
$out4,$out4,v24
3155 vncipher
$out5,$out5,v24
3156 lvx v24
,$x20,$key_ # round[3]
3157 addi
$key_,$key_,0x20
3159 vncipher
$out0,$out0,v25
3160 vncipher
$out1,$out1,v25
3161 vncipher
$out2,$out2,v25
3162 vncipher
$out3,$out3,v25
3163 vncipher
$out4,$out4,v25
3164 vncipher
$out5,$out5,v25
3165 lvx v25
,$x10,$key_ # round[4]
3168 subic
$len,$len,96 # $len-=96
3169 vxor
$in0,$twk0,v31
# xor with last round key
3170 vncipher
$out0,$out0,v24
3171 vncipher
$out1,$out1,v24
3172 vsrab
$tmp,$tweak,$seven # next tweak value
3173 vxor
$twk0,$tweak,$rndkey0
3174 vaddubm
$tweak,$tweak,$tweak
3175 vncipher
$out2,$out2,v24
3176 vncipher
$out3,$out3,v24
3177 vsldoi
$tmp,$tmp,$tmp,15
3178 vncipher
$out4,$out4,v24
3179 vncipher
$out5,$out5,v24
3181 subfe
. r0
,r0
,r0
# borrow?-1:0
3182 vand
$tmp,$tmp,$eighty7
3183 vncipher
$out0,$out0,v25
3184 vncipher
$out1,$out1,v25
3185 vxor
$tweak,$tweak,$tmp
3186 vncipher
$out2,$out2,v25
3187 vncipher
$out3,$out3,v25
3189 vsrab
$tmp,$tweak,$seven # next tweak value
3190 vxor
$twk1,$tweak,$rndkey0
3191 vncipher
$out4,$out4,v25
3192 vncipher
$out5,$out5,v25
3195 vaddubm
$tweak,$tweak,$tweak
3196 vsldoi
$tmp,$tmp,$tmp,15
3197 vncipher
$out0,$out0,v26
3198 vncipher
$out1,$out1,v26
3199 vand
$tmp,$tmp,$eighty7
3200 vncipher
$out2,$out2,v26
3201 vncipher
$out3,$out3,v26
3202 vxor
$tweak,$tweak,$tmp
3203 vncipher
$out4,$out4,v26
3204 vncipher
$out5,$out5,v26
3206 add
$inp,$inp,r0
# $inp is adjusted in such
3207 # way that at exit from the
3208 # loop inX-in5 are loaded
3211 vsrab
$tmp,$tweak,$seven # next tweak value
3212 vxor
$twk2,$tweak,$rndkey0
3213 vaddubm
$tweak,$tweak,$tweak
3214 vncipher
$out0,$out0,v27
3215 vncipher
$out1,$out1,v27
3216 vsldoi
$tmp,$tmp,$tmp,15
3217 vncipher
$out2,$out2,v27
3218 vncipher
$out3,$out3,v27
3219 vand
$tmp,$tmp,$eighty7
3220 vncipher
$out4,$out4,v27
3221 vncipher
$out5,$out5,v27
3223 addi
$key_,$sp,$FRAME+15 # rewind $key_
3224 vxor
$tweak,$tweak,$tmp
3225 vncipher
$out0,$out0,v28
3226 vncipher
$out1,$out1,v28
3228 vsrab
$tmp,$tweak,$seven # next tweak value
3229 vxor
$twk3,$tweak,$rndkey0
3230 vncipher
$out2,$out2,v28
3231 vncipher
$out3,$out3,v28
3232 vaddubm
$tweak,$tweak,$tweak
3233 vsldoi
$tmp,$tmp,$tmp,15
3234 vncipher
$out4,$out4,v28
3235 vncipher
$out5,$out5,v28
3236 lvx v24
,$x00,$key_ # re-pre-load round[1]
3237 vand
$tmp,$tmp,$eighty7
3239 vncipher
$out0,$out0,v29
3240 vncipher
$out1,$out1,v29
3241 vxor
$tweak,$tweak,$tmp
3242 vncipher
$out2,$out2,v29
3243 vncipher
$out3,$out3,v29
3245 vsrab
$tmp,$tweak,$seven # next tweak value
3246 vxor
$twk4,$tweak,$rndkey0
3247 vncipher
$out4,$out4,v29
3248 vncipher
$out5,$out5,v29
3249 lvx v25
,$x10,$key_ # re-pre-load round[2]
3250 vaddubm
$tweak,$tweak,$tweak
3251 vsldoi
$tmp,$tmp,$tmp,15
3253 vncipher
$out0,$out0,v30
3254 vncipher
$out1,$out1,v30
3255 vand
$tmp,$tmp,$eighty7
3256 vncipher
$out2,$out2,v30
3257 vncipher
$out3,$out3,v30
3258 vxor
$tweak,$tweak,$tmp
3259 vncipher
$out4,$out4,v30
3260 vncipher
$out5,$out5,v30
3262 vsrab
$tmp,$tweak,$seven # next tweak value
3263 vxor
$twk5,$tweak,$rndkey0
3265 vncipherlast
$out0,$out0,$in0
3266 lvx_u
$in0,$x00,$inp # load next input block
3267 vaddubm
$tweak,$tweak,$tweak
3268 vsldoi
$tmp,$tmp,$tmp,15
3269 vncipherlast
$out1,$out1,$in1
3270 lvx_u
$in1,$x10,$inp
3271 vncipherlast
$out2,$out2,$in2
3272 le?vperm
$in0,$in0,$in0,$leperm
3273 lvx_u
$in2,$x20,$inp
3274 vand
$tmp,$tmp,$eighty7
3275 vncipherlast
$out3,$out3,$in3
3276 le?vperm
$in1,$in1,$in1,$leperm
3277 lvx_u
$in3,$x30,$inp
3278 vncipherlast
$out4,$out4,$in4
3279 le?vperm
$in2,$in2,$in2,$leperm
3280 lvx_u
$in4,$x40,$inp
3281 vxor
$tweak,$tweak,$tmp
3282 vncipherlast
$out5,$out5,$in5
3283 le?vperm
$in3,$in3,$in3,$leperm
3284 lvx_u
$in5,$x50,$inp
3286 le?vperm
$in4,$in4,$in4,$leperm
3287 le?vperm
$in5,$in5,$in5,$leperm
3289 le?vperm
$out0,$out0,$out0,$leperm
3290 le?vperm
$out1,$out1,$out1,$leperm
3291 stvx_u
$out0,$x00,$out # store output
3292 vxor
$out0,$in0,$twk0
3293 le?vperm
$out2,$out2,$out2,$leperm
3294 stvx_u
$out1,$x10,$out
3295 vxor
$out1,$in1,$twk1
3296 le?vperm
$out3,$out3,$out3,$leperm
3297 stvx_u
$out2,$x20,$out
3298 vxor
$out2,$in2,$twk2
3299 le?vperm
$out4,$out4,$out4,$leperm
3300 stvx_u
$out3,$x30,$out
3301 vxor
$out3,$in3,$twk3
3302 le?vperm
$out5,$out5,$out5,$leperm
3303 stvx_u
$out4,$x40,$out
3304 vxor
$out4,$in4,$twk4
3305 stvx_u
$out5,$x50,$out
3306 vxor
$out5,$in5,$twk5
3310 beq Loop_xts_dec6x
# did $len-=96 borrow?
3312 addic
. $len,$len,0x60
3319 blt Lxts_dec6x_three
3324 vxor
$out0,$in1,$twk0
3325 vxor
$out1,$in2,$twk1
3326 vxor
$out2,$in3,$twk2
3327 vxor
$out3,$in4,$twk3
3328 vxor
$out4,$in5,$twk4
3332 le?vperm
$out0,$out0,$out0,$leperm
3333 vmr
$twk0,$twk5 # unused tweak
3334 vxor
$twk1,$tweak,$rndkey0
3335 le?vperm
$out1,$out1,$out1,$leperm
3336 stvx_u
$out0,$x00,$out # store output
3337 vxor
$out0,$in0,$twk1
3338 le?vperm
$out2,$out2,$out2,$leperm
3339 stvx_u
$out1,$x10,$out
3340 le?vperm
$out3,$out3,$out3,$leperm
3341 stvx_u
$out2,$x20,$out
3342 le?vperm
$out4,$out4,$out4,$leperm
3343 stvx_u
$out3,$x30,$out
3344 stvx_u
$out4,$x40,$out
3346 bne Lxts_dec6x_steal
3351 vxor
$out0,$in2,$twk0
3352 vxor
$out1,$in3,$twk1
3353 vxor
$out2,$in4,$twk2
3354 vxor
$out3,$in5,$twk3
3355 vxor
$out4,$out4,$out4
3359 le?vperm
$out0,$out0,$out0,$leperm
3360 vmr
$twk0,$twk4 # unused tweak
3362 le?vperm
$out1,$out1,$out1,$leperm
3363 stvx_u
$out0,$x00,$out # store output
3364 vxor
$out0,$in0,$twk5
3365 le?vperm
$out2,$out2,$out2,$leperm
3366 stvx_u
$out1,$x10,$out
3367 le?vperm
$out3,$out3,$out3,$leperm
3368 stvx_u
$out2,$x20,$out
3369 stvx_u
$out3,$x30,$out
3371 bne Lxts_dec6x_steal
3376 vxor
$out0,$in3,$twk0
3377 vxor
$out1,$in4,$twk1
3378 vxor
$out2,$in5,$twk2
3379 vxor
$out3,$out3,$out3
3380 vxor
$out4,$out4,$out4
3384 le?vperm
$out0,$out0,$out0,$leperm
3385 vmr
$twk0,$twk3 # unused tweak
3387 le?vperm
$out1,$out1,$out1,$leperm
3388 stvx_u
$out0,$x00,$out # store output
3389 vxor
$out0,$in0,$twk4
3390 le?vperm
$out2,$out2,$out2,$leperm
3391 stvx_u
$out1,$x10,$out
3392 stvx_u
$out2,$x20,$out
3394 bne Lxts_dec6x_steal
3399 vxor
$out0,$in4,$twk0
3400 vxor
$out1,$in5,$twk1
3401 vxor
$out2,$out2,$out2
3402 vxor
$out3,$out3,$out3
3403 vxor
$out4,$out4,$out4
3407 le?vperm
$out0,$out0,$out0,$leperm
3408 vmr
$twk0,$twk2 # unused tweak
3410 le?vperm
$out1,$out1,$out1,$leperm
3411 stvx_u
$out0,$x00,$out # store output
3412 vxor
$out0,$in0,$twk3
3413 stvx_u
$out1,$x10,$out
3415 bne Lxts_dec6x_steal
3420 vxor
$out0,$in5,$twk0
3423 vncipher
$out0,$out0,v24
3424 lvx v24
,$x20,$key_ # round[3]
3425 addi
$key_,$key_,0x20
3427 vncipher
$out0,$out0,v25
3428 lvx v25
,$x10,$key_ # round[4]
3432 vncipher
$out0,$out0,v24
3436 vncipher
$out0,$out0,v25
3439 vncipher
$out0,$out0,v26
3442 vncipher
$out0,$out0,v27
3444 addi
$key_,$sp,$FRAME+15 # rewind $key_
3445 vncipher
$out0,$out0,v28
3446 lvx v24
,$x00,$key_ # re-pre-load round[1]
3448 vncipher
$out0,$out0,v29
3449 lvx v25
,$x10,$key_ # re-pre-load round[2]
3450 vxor
$twk0,$twk0,v31
3452 le?vperm
$in0,$in0,$in0,$leperm
3453 vncipher
$out0,$out0,v30
3456 vncipherlast
$out0,$out0,$twk0
3458 vmr
$twk0,$twk1 # unused tweak
3460 le?vperm
$out0,$out0,$out0,$leperm
3461 stvx_u
$out0,$x00,$out # store output
3463 vxor
$out0,$in0,$twk2
3464 bne Lxts_dec6x_steal
3473 le?vperm
$in0,$in0,$in0,$leperm
3474 vxor
$out0,$in0,$twk1
3476 vncipher
$out0,$out0,v24
3477 lvx v24
,$x20,$key_ # round[3]
3478 addi
$key_,$key_,0x20
3480 vncipher
$out0,$out0,v25
3481 lvx v25
,$x10,$key_ # round[4]
3482 bdnz Lxts_dec6x_steal
3484 add
$inp,$inp,$taillen
3485 vncipher
$out0,$out0,v24
3488 vncipher
$out0,$out0,v25
3491 vncipher
$out0,$out0,v26
3493 lvsr
$inpperm,0,$taillen # $in5 is no more
3494 vncipher
$out0,$out0,v27
3496 addi
$key_,$sp,$FRAME+15 # rewind $key_
3497 vncipher
$out0,$out0,v28
3498 lvx v24
,$x00,$key_ # re-pre-load round[1]
3500 vncipher
$out0,$out0,v29
3501 lvx v25
,$x10,$key_ # re-pre-load round[2]
3502 vxor
$twk1,$twk1,v31
3504 le?vperm
$in0,$in0,$in0,$leperm
3505 vncipher
$out0,$out0,v30
3507 vperm
$in0,$in0,$in0,$inpperm
3508 vncipherlast
$tmp,$out0,$twk1
3510 le?vperm
$out0,$tmp,$tmp,$leperm
3511 le?stvx_u
$out0,0,$out
3512 be?stvx_u
$tmp,0,$out
3514 vxor
$out0,$out0,$out0
3516 vperm
$out0,$out0,$out1,$inpperm
3517 vsel
$out0,$in0,$tmp,$out0
3518 vxor
$out0,$out0,$twk0
3522 Loop_xts_dec6x_steal
:
3525 bdnz Loop_xts_dec6x_steal
3529 b Loop_xts_dec1x
# one more time...
3536 stvx
$seven,r10
,$sp # wipe copies of round keys
3554 lvx v20
,r10
,$sp # ABI says so
3576 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3577 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3578 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3579 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3580 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3581 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3582 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3585 .byte
0,12,0x04,1,0x80,6,6,0
3590 vncipher
$out0,$out0,v24
3591 vncipher
$out1,$out1,v24
3592 vncipher
$out2,$out2,v24
3593 vncipher
$out3,$out3,v24
3594 vncipher
$out4,$out4,v24
3595 lvx v24
,$x20,$key_ # round[3]
3596 addi
$key_,$key_,0x20
3598 vncipher
$out0,$out0,v25
3599 vncipher
$out1,$out1,v25
3600 vncipher
$out2,$out2,v25
3601 vncipher
$out3,$out3,v25
3602 vncipher
$out4,$out4,v25
3603 lvx v25
,$x10,$key_ # round[4]
3604 bdnz _aesp8_xts_dec5x
3607 vncipher
$out0,$out0,v24
3608 vncipher
$out1,$out1,v24
3609 vncipher
$out2,$out2,v24
3610 vncipher
$out3,$out3,v24
3611 vncipher
$out4,$out4,v24
3615 vncipher
$out0,$out0,v25
3616 vncipher
$out1,$out1,v25
3617 vncipher
$out2,$out2,v25
3618 vncipher
$out3,$out3,v25
3619 vncipher
$out4,$out4,v25
3620 vxor
$twk0,$twk0,v31
3623 vncipher
$out0,$out0,v26
3624 vncipher
$out1,$out1,v26
3625 vncipher
$out2,$out2,v26
3626 vncipher
$out3,$out3,v26
3627 vncipher
$out4,$out4,v26
3630 vncipher
$out0,$out0,v27
3632 vncipher
$out1,$out1,v27
3633 vncipher
$out2,$out2,v27
3634 vncipher
$out3,$out3,v27
3635 vncipher
$out4,$out4,v27
3638 addi
$key_,$sp,$FRAME+15 # rewind $key_
3639 vncipher
$out0,$out0,v28
3640 vncipher
$out1,$out1,v28
3641 vncipher
$out2,$out2,v28
3642 vncipher
$out3,$out3,v28
3643 vncipher
$out4,$out4,v28
3644 lvx v24
,$x00,$key_ # re-pre-load round[1]
3647 vncipher
$out0,$out0,v29
3648 le?vperm
$in0,$in0,$in0,$leperm
3649 vncipher
$out1,$out1,v29
3650 vncipher
$out2,$out2,v29
3651 vncipher
$out3,$out3,v29
3652 vncipher
$out4,$out4,v29
3653 lvx v25
,$x10,$key_ # re-pre-load round[2]
3656 vncipher
$out0,$out0,v30
3657 vncipher
$out1,$out1,v30
3658 vncipher
$out2,$out2,v30
3659 vncipher
$out3,$out3,v30
3660 vncipher
$out4,$out4,v30
3662 vncipherlast
$out0,$out0,$twk0
3663 vncipherlast
$out1,$out1,$in1
3664 vncipherlast
$out2,$out2,$in2
3665 vncipherlast
$out3,$out3,$in3
3666 vncipherlast
$out4,$out4,$in4
3670 .byte
0,12,0x14,0,0,0,0,0
3675 foreach(split("\n",$code)) {
3676 s/\`([^\`]*)\`/eval($1)/geo;
3678 # constants table endian-specific conversion
3679 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3683 # convert to endian-agnostic format
3685 foreach (split(/,\s*/,$2)) {
3686 my $l = /^0/?
oct:int;
3687 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3690 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
3693 # little-endian conversion
3694 if ($flavour =~ /le$/o) {
3695 SWITCH
: for($conv) {
3696 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3697 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3702 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3705 $consts=0 if (m/Lconsts:/o); # end of table
3707 # instructions prefixed with '?' are endian-specific and need
3708 # to be adjusted accordingly...
3709 if ($flavour =~ /le$/o) { # little-endian
3714 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3715 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3716 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3717 } else { # big-endian