]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/rc4/asm/rc4-parisc.pl
b9927dad9f73834b2907e121392c9b4d94d5c58b
[thirdparty/openssl.git] / crypto / rc4 / asm / rc4-parisc.pl
1 #! /usr/bin/env perl
2 # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # RC4 for PA-RISC.
18
19 # June 2009.
20 #
21 # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
22 # For reference, [4x] unrolled loop is >40% faster than folded one.
23 # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
24 # is believed to be not sufficient to justify the effort...
25 #
26 # Special thanks to polarhome.com for providing HP-UX account.
27
28 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
29
30 $flavour = shift;
31 $output = shift;
32 open STDOUT,">$output";
33
34 if ($flavour =~ /64/) {
35 $LEVEL ="2.0W";
36 $SIZE_T =8;
37 $FRAME_MARKER =80;
38 $SAVED_RP =16;
39 $PUSH ="std";
40 $PUSHMA ="std,ma";
41 $POP ="ldd";
42 $POPMB ="ldd,mb";
43 } else {
44 $LEVEL ="1.0";
45 $SIZE_T =4;
46 $FRAME_MARKER =48;
47 $SAVED_RP =20;
48 $PUSH ="stw";
49 $PUSHMA ="stwm";
50 $POP ="ldw";
51 $POPMB ="ldwm";
52 }
53
54 $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
55 # [+ argument transfer]
56 $SZ=1; # defaults to RC4_CHAR
57 if (open CONF,"<${dir}../../opensslconf.h") {
58 while(<CONF>) {
59 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
60 $SZ = ($1=~/char$/) ? 1 : 4;
61 last;
62 }
63 }
64 close CONF;
65 }
66
67 if ($SZ==1) { # RC4_CHAR
68 $LD="ldb";
69 $LDX="ldbx";
70 $MKX="addl";
71 $ST="stb";
72 } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
73 $LD="ldw";
74 $LDX="ldwx,s";
75 $MKX="sh2addl";
76 $ST="stw";
77 }
78
79 $key="%r26";
80 $len="%r25";
81 $inp="%r24";
82 $out="%r23";
83
84 @XX=("%r19","%r20");
85 @TX=("%r21","%r22");
86 $YY="%r28";
87 $TY="%r29";
88
89 $acc="%r1";
90 $ix="%r2";
91 $iy="%r3";
92 $dat0="%r4";
93 $dat1="%r5";
94 $rem="%r6";
95 $mask="%r31";
96
97 sub unrolledloopbody {
98 for ($i=0;$i<4;$i++) {
99 $code.=<<___;
100 ldo 1($XX[0]),$XX[1]
101 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
102 and $mask,$XX[1],$XX[1]
103 $LDX $YY($key),$TY
104 $MKX $YY,$key,$ix
105 $LDX $XX[1]($key),$TX[1]
106 $MKX $XX[0],$key,$iy
107 $ST $TX[0],0($ix)
108 comclr,<> $XX[1],$YY,%r0 ; conditional
109 copy $TX[0],$TX[1] ; move
110 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
111 $ST $TY,0($iy)
112 addl $TX[0],$TY,$TY
113 addl $TX[1],$YY,$YY
114 and $mask,$TY,$TY
115 and $mask,$YY,$YY
116 ___
117 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
118 } }
119
120 sub foldedloop {
121 my ($label,$count)=@_;
122 $code.=<<___;
123 $label
124 $MKX $YY,$key,$iy
125 $LDX $YY($key),$TY
126 $MKX $XX[0],$key,$ix
127 $ST $TX[0],0($iy)
128 ldo 1($XX[0]),$XX[0]
129 $ST $TY,0($ix)
130 addl $TX[0],$TY,$TY
131 ldbx $inp($out),$dat1
132 and $mask,$TY,$TY
133 and $mask,$XX[0],$XX[0]
134 $LDX $TY($key),$acc
135 $LDX $XX[0]($key),$TX[0]
136 ldo 1($out),$out
137 xor $dat1,$acc,$acc
138 addl $TX[0],$YY,$YY
139 stb $acc,-1($out)
140 addib,<> -1,$count,$label ; $count is always small
141 and $mask,$YY,$YY
142 ___
143 }
144
145 $code=<<___;
146 .LEVEL $LEVEL
147 .SPACE \$TEXT\$
148 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
149
150 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
151 RC4
152 .PROC
153 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
154 .ENTRY
155 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
156 $PUSHMA %r3,$FRAME(%sp)
157 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
158 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
159 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
160
161 cmpib,*= 0,$len,L\$abort
162 sub $inp,$out,$inp ; distance between $inp and $out
163
164 $LD `0*$SZ`($key),$XX[0]
165 $LD `1*$SZ`($key),$YY
166 ldo `2*$SZ`($key),$key
167
168 ldi 0xff,$mask
169 ldi 3,$dat0
170
171 ldo 1($XX[0]),$XX[0] ; warm up loop
172 and $mask,$XX[0],$XX[0]
173 $LDX $XX[0]($key),$TX[0]
174 addl $TX[0],$YY,$YY
175 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
176 and $mask,$YY,$YY
177
178 and,<> $out,$dat0,$rem ; is $out aligned?
179 b L\$alignedout
180 subi 4,$rem,$rem
181 sub $len,$rem,$len
182 ___
183 &foldedloop("L\$alignout",$rem); # process till $out is aligned
184
185 $code.=<<___;
186 L\$alignedout ; $len is at least 4 here
187 and,<> $inp,$dat0,$acc ; is $inp aligned?
188 b L\$oop4
189 sub $inp,$acc,$rem ; align $inp
190
191 sh3addl $acc,%r0,$acc
192 subi 32,$acc,$acc
193 mtctl $acc,%cr11 ; load %sar with vshd align factor
194 ldwx $rem($out),$dat0
195 ldo 4($rem),$rem
196 L\$oop4misalignedinp
197 ___
198 &unrolledloopbody();
199 $code.=<<___;
200 $LDX $TY($key),$ix
201 ldwx $rem($out),$dat1
202 ldo -4($len),$len
203 or $ix,$acc,$acc ; last piece, no need to dep
204 vshd $dat0,$dat1,$iy ; align data
205 copy $dat1,$dat0
206 xor $iy,$acc,$acc
207 stw $acc,0($out)
208 cmpib,*<< 3,$len,L\$oop4misalignedinp
209 ldo 4($out),$out
210 cmpib,*= 0,$len,L\$done
211 nop
212 b L\$oop1
213 nop
214
215 .ALIGN 8
216 L\$oop4
217 ___
218 &unrolledloopbody();
219 $code.=<<___;
220 $LDX $TY($key),$ix
221 ldwx $inp($out),$dat0
222 ldo -4($len),$len
223 or $ix,$acc,$acc ; last piece, no need to dep
224 xor $dat0,$acc,$acc
225 stw $acc,0($out)
226 cmpib,*<< 3,$len,L\$oop4
227 ldo 4($out),$out
228 cmpib,*= 0,$len,L\$done
229 nop
230 ___
231 &foldedloop("L\$oop1",$len);
232 $code.=<<___;
233 L\$done
234 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
235 ldo -1($XX[0]),$XX[0] ; chill out loop
236 sub $YY,$TX[0],$YY
237 and $mask,$XX[0],$XX[0]
238 and $mask,$YY,$YY
239 $ST $XX[0],`-2*$SZ`($key)
240 $ST $YY,`-1*$SZ`($key)
241 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
242 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
243 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
244 L\$abort
245 bv (%r2)
246 .EXIT
247 $POPMB -$FRAME(%sp),%r3
248 .PROCEND
249 ___
250
251 $code.=<<___;
252
253 .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
254 .ALIGN 8
255 RC4_set_key
256 .PROC
257 .CALLINFO NO_CALLS
258 .ENTRY
259 $ST %r0,`0*$SZ`($key)
260 $ST %r0,`1*$SZ`($key)
261 ldo `2*$SZ`($key),$key
262 copy %r0,@XX[0]
263 L\$1st
264 $ST @XX[0],0($key)
265 ldo 1(@XX[0]),@XX[0]
266 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
267 ldo $SZ($key),$key
268
269 ldo `-256*$SZ`($key),$key ; rewind $key
270 addl $len,$inp,$inp ; $inp to point at the end
271 sub %r0,$len,%r23 ; inverse index
272 copy %r0,@XX[0]
273 copy %r0,@XX[1]
274 ldi 0xff,$mask
275
276 L\$2nd
277 $LDX @XX[0]($key),@TX[0]
278 ldbx %r23($inp),@TX[1]
279 addi,nuv 1,%r23,%r23 ; increment and conditional
280 sub %r0,$len,%r23 ; inverse index
281 addl @TX[0],@XX[1],@XX[1]
282 addl @TX[1],@XX[1],@XX[1]
283 and $mask,@XX[1],@XX[1]
284 $MKX @XX[0],$key,$TY
285 $LDX @XX[1]($key),@TX[1]
286 $MKX @XX[1],$key,$YY
287 ldo 1(@XX[0]),@XX[0]
288 $ST @TX[0],0($YY)
289 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
290 $ST @TX[1],0($TY)
291
292 bv,n (%r2)
293 .EXIT
294 nop
295 .PROCEND
296
297 .EXPORT RC4_options,ENTRY
298 .ALIGN 8
299 RC4_options
300 .PROC
301 .CALLINFO NO_CALLS
302 .ENTRY
303 blr %r0,%r28
304 ldi 3,%r1
305 L\$pic
306 andcm %r28,%r1,%r28
307 bv (%r2)
308 .EXIT
309 ldo L\$opts-L\$pic(%r28),%r28
310 .PROCEND
311 .ALIGN 8
312 L\$opts
313 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
314 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
315 ___
316
317 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
318 =~ /GNU assembler/) {
319 $gnuas = 1;
320 }
321
322 foreach(split("\n",$code)) {
323 s/\`([^\`]*)\`/eval $1/ge;
324
325 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
326 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
327 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
328 s/cmpib,\*/comib,/ if ($SIZE_T==4);
329 s/\bbv\b/bve/ if ($SIZE_T==8);
330
331 print $_,"\n";
332 }
333 close STDOUT;