]>
git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/rc4/asm/rc4-parisc.pl
2 # Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
21 # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
22 # For reference, [4x] unrolled loop is >40% faster than folded one.
23 # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
24 # is believed to be not sufficient to justify the effort...
26 # Special thanks to polarhome.com for providing HP-UX account.
28 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
32 open STDOUT
,">$output";
34 if ($flavour =~ /64/) {
54 $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
55 # [+ argument transfer]
56 $SZ=1; # defaults to RC4_CHAR
57 if (open CONF
,"<${dir}../../opensslconf.h") {
59 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
60 $SZ = ($1=~/char$/) ?
1 : 4;
67 if ($SZ==1) { # RC4_CHAR
72 } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
97 sub unrolledloopbody
{
98 for ($i=0;$i<4;$i++) {
101 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
102 and $mask,$XX[1],$XX[1]
105 $LDX $XX[1]($key),$TX[1]
108 comclr
,<> $XX[1],$YY,%r0 ; conditional
109 copy
$TX[0],$TX[1] ; move
110 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
117 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
121 my ($label,$count)=@_;
131 ldbx
$inp($out),$dat1
133 and $mask,$XX[0],$XX[0]
135 $LDX $XX[0]($key),$TX[0]
140 addib
,<> -1,$count,$label ; $count is always small
148 .SUBSPA \
$CODE\
$,QUAD
=0,ALIGN
=8,ACCESS
=0x2C,CODE_ONLY
150 .EXPORT RC4
,ENTRY
,ARGW0
=GR
,ARGW1
=GR
,ARGW2
=GR
,ARGW3
=GR
153 .CALLINFO FRAME
=`$FRAME-4*$SIZE_T`,NO_CALLS
,SAVE_RP
,ENTRY_GR
=6
155 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
156 $PUSHMA %r3,$FRAME(%sp)
157 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
158 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
159 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
161 cmpib
,*= 0,$len,L\
$abort
162 sub $inp,$out,$inp ; distance between
$inp and $out
164 $LD `0*$SZ`($key),$XX[0]
165 $LD `1*$SZ`($key),$YY
166 ldo
`2*$SZ`($key),$key
171 ldo
1($XX[0]),$XX[0] ; warm up
loop
172 and $mask,$XX[0],$XX[0]
173 $LDX $XX[0]($key),$TX[0]
175 cmpib
,*>>= 6,$len,L\
$oop1 ; is
$len large enough to bother?
178 and,<> $out,$dat0,$rem ; is
$out aligned?
183 &foldedloop
("L\$alignout",$rem); # process till $out is aligned
186 L\
$alignedout ; $len is at least
4 here
187 and,<> $inp,$dat0,$acc ; is
$inp aligned?
189 sub $inp,$acc,$rem ; align
$inp
191 sh3addl
$acc,%r0,$acc
193 mtctl
$acc,%cr11 ; load
%sar with vshd align factor
194 ldwx
$rem($out),$dat0
201 ldwx
$rem($out),$dat1
203 or $ix,$acc,$acc ; last piece
, no need to dep
204 vshd
$dat0,$dat1,$iy ; align data
208 cmpib
,*<< 3,$len,L\
$oop4misalignedinp
210 cmpib
,*= 0,$len,L\
$done
221 ldwx
$inp($out),$dat0
223 or $ix,$acc,$acc ; last piece
, no need to dep
226 cmpib
,*<< 3,$len,L\
$oop4
228 cmpib
,*= 0,$len,L\
$done
231 &foldedloop
("L\$oop1",$len);
234 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
235 ldo
-1($XX[0]),$XX[0] ; chill out
loop
237 and $mask,$XX[0],$XX[0]
239 $ST $XX[0],`-2*$SZ`($key)
240 $ST $YY,`-1*$SZ`($key)
241 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
242 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
243 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
247 $POPMB -$FRAME(%sp),%r3
253 .EXPORT RC4_set_key
,ENTRY
,ARGW0
=GR
,ARGW1
=GR
,ARGW2
=GR
259 $ST %r0,`0*$SZ`($key)
260 $ST %r0,`1*$SZ`($key)
261 ldo
`2*$SZ`($key),$key
266 bb
,>= @XX[0],`31-8`,L\
$1st ; @XX[0]<256
269 ldo
`-256*$SZ`($key),$key ; rewind
$key
270 addl
$len,$inp,$inp ; $inp to point at the end
271 sub %r0,$len,%r23 ; inverse
index
277 $LDX @XX[0]($key),@TX[0]
278 ldbx
%r23($inp),@TX[1]
279 addi
,nuv
1,%r23,%r23 ; increment
and conditional
280 sub %r0,$len,%r23 ; inverse
index
281 addl
@TX[0],@XX[1],@XX[1]
282 addl
@TX[1],@XX[1],@XX[1]
283 and $mask,@XX[1],@XX[1]
285 $LDX @XX[1]($key),@TX[1]
289 bb
,>= @XX[0],`31-8`,L\
$2nd ; @XX[0]<256
297 .EXPORT RC4_options
,ENTRY
309 ldo L\
$opts-L\
$pic(%r28),%r28
313 .STRINGZ
"rc4(4x,`$SZ==1?"char
":"int"`)"
314 .STRINGZ
"RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
317 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
318 =~ /GNU assembler/) {
322 foreach(split("\n",$code)) {
323 s/\`([^\`]*)\`/eval $1/ge;
325 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
326 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
327 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
328 s/cmpib,\*/comib,/ if ($SIZE_T==4);
329 s/\bbv\b/bve/ if ($SIZE_T==8);