]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sha/asm/sha1-alpha.pl
Following the license change, modify the boilerplates in crypto/sha/
[thirdparty/openssl.git] / crypto / sha / asm / sha1-alpha.pl
1 #! /usr/bin/env perl
2 # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # SHA1 block procedure for Alpha.
18
19 # On 21264 performance is 33% better than code generated by vendor
20 # compiler, and 75% better than GCC [3.4], and in absolute terms is
21 # 8.7 cycles per processed byte. Implementation features vectorized
22 # byte swap, but not Xupdate.
23
24 @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
25 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
26 $ctx="a0"; # $16
27 $inp="a1";
28 $num="a2";
29 $A="a3";
30 $B="a4"; # 20
31 $C="a5";
32 $D="t8";
33 $E="t9"; @V=($A,$B,$C,$D,$E);
34 $t0="t10"; # 24
35 $t1="t11";
36 $t2="ra";
37 $t3="t12";
38 $K="AT"; # 28
39
40 sub BODY_00_19 {
41 my ($i,$a,$b,$c,$d,$e)=@_;
42 my $j=$i+1;
43 $code.=<<___ if ($i==0);
44 ldq_u @X[0],0+0($inp)
45 ldq_u @X[1],0+7($inp)
46 ___
47 $code.=<<___ if (!($i&1) && $i<14);
48 ldq_u @X[$i+2],($i+2)*4+0($inp)
49 ldq_u @X[$i+3],($i+2)*4+7($inp)
50 ___
51 $code.=<<___ if (!($i&1) && $i<15);
52 extql @X[$i],$inp,@X[$i]
53 extqh @X[$i+1],$inp,@X[$i+1]
54
55 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
56
57 srl @X[$i],24,$t0 # vectorized byte swap
58 srl @X[$i],8,$t2
59
60 sll @X[$i],8,$t3
61 sll @X[$i],24,@X[$i]
62 zapnot $t0,0x11,$t0
63 zapnot $t2,0x22,$t2
64
65 zapnot @X[$i],0x88,@X[$i]
66 or $t0,$t2,$t0
67 zapnot $t3,0x44,$t3
68 sll $a,5,$t1
69
70 or @X[$i],$t0,@X[$i]
71 addl $K,$e,$e
72 and $b,$c,$t2
73 zapnot $a,0xf,$a
74
75 or @X[$i],$t3,@X[$i]
76 srl $a,27,$t0
77 bic $d,$b,$t3
78 sll $b,30,$b
79
80 extll @X[$i],4,@X[$i+1] # extract upper half
81 or $t2,$t3,$t2
82 addl @X[$i],$e,$e
83
84 addl $t1,$e,$e
85 srl $b,32,$t3
86 zapnot @X[$i],0xf,@X[$i]
87
88 addl $t0,$e,$e
89 addl $t2,$e,$e
90 or $t3,$b,$b
91 ___
92 $code.=<<___ if (($i&1) && $i<15);
93 sll $a,5,$t1
94 addl $K,$e,$e
95 and $b,$c,$t2
96 zapnot $a,0xf,$a
97
98 srl $a,27,$t0
99 addl @X[$i%16],$e,$e
100 bic $d,$b,$t3
101 sll $b,30,$b
102
103 or $t2,$t3,$t2
104 addl $t1,$e,$e
105 srl $b,32,$t3
106 zapnot @X[$i],0xf,@X[$i]
107
108 addl $t0,$e,$e
109 addl $t2,$e,$e
110 or $t3,$b,$b
111 ___
112 $code.=<<___ if ($i>=15); # with forward Xupdate
113 sll $a,5,$t1
114 addl $K,$e,$e
115 and $b,$c,$t2
116 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
117
118 zapnot $a,0xf,$a
119 addl @X[$i%16],$e,$e
120 bic $d,$b,$t3
121 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
122
123 srl $a,27,$t0
124 addl $t1,$e,$e
125 or $t2,$t3,$t2
126 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
127
128 sll $b,30,$b
129 addl $t0,$e,$e
130 srl @X[$j%16],31,$t1
131
132 addl $t2,$e,$e
133 srl $b,32,$t3
134 addl @X[$j%16],@X[$j%16],@X[$j%16]
135
136 or $t3,$b,$b
137 zapnot @X[$i%16],0xf,@X[$i%16]
138 or $t1,@X[$j%16],@X[$j%16]
139 ___
140 }
141
142 sub BODY_20_39 {
143 my ($i,$a,$b,$c,$d,$e)=@_;
144 my $j=$i+1;
145 $code.=<<___ if ($i<79); # with forward Xupdate
146 sll $a,5,$t1
147 addl $K,$e,$e
148 zapnot $a,0xf,$a
149 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
150
151 sll $b,30,$t3
152 addl $t1,$e,$e
153 xor $b,$c,$t2
154 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
155
156 srl $b,2,$b
157 addl @X[$i%16],$e,$e
158 xor $d,$t2,$t2
159 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
160
161 srl @X[$j%16],31,$t1
162 addl $t2,$e,$e
163 srl $a,27,$t0
164 addl @X[$j%16],@X[$j%16],@X[$j%16]
165
166 or $t3,$b,$b
167 addl $t0,$e,$e
168 or $t1,@X[$j%16],@X[$j%16]
169 ___
170 $code.=<<___ if ($i<77);
171 zapnot @X[$i%16],0xf,@X[$i%16]
172 ___
173 $code.=<<___ if ($i==79); # with context fetch
174 sll $a,5,$t1
175 addl $K,$e,$e
176 zapnot $a,0xf,$a
177 ldl @X[0],0($ctx)
178
179 sll $b,30,$t3
180 addl $t1,$e,$e
181 xor $b,$c,$t2
182 ldl @X[1],4($ctx)
183
184 srl $b,2,$b
185 addl @X[$i%16],$e,$e
186 xor $d,$t2,$t2
187 ldl @X[2],8($ctx)
188
189 srl $a,27,$t0
190 addl $t2,$e,$e
191 ldl @X[3],12($ctx)
192
193 or $t3,$b,$b
194 addl $t0,$e,$e
195 ldl @X[4],16($ctx)
196 ___
197 }
198
199 sub BODY_40_59 {
200 my ($i,$a,$b,$c,$d,$e)=@_;
201 my $j=$i+1;
202 $code.=<<___; # with forward Xupdate
203 sll $a,5,$t1
204 addl $K,$e,$e
205 zapnot $a,0xf,$a
206 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
207
208 srl $a,27,$t0
209 and $b,$c,$t2
210 and $b,$d,$t3
211 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
212
213 sll $b,30,$b
214 addl $t1,$e,$e
215 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
216
217 srl @X[$j%16],31,$t1
218 addl $t0,$e,$e
219 or $t2,$t3,$t2
220 and $c,$d,$t3
221
222 or $t2,$t3,$t2
223 srl $b,32,$t3
224 addl @X[$i%16],$e,$e
225 addl @X[$j%16],@X[$j%16],@X[$j%16]
226
227 or $t3,$b,$b
228 addl $t2,$e,$e
229 or $t1,@X[$j%16],@X[$j%16]
230 zapnot @X[$i%16],0xf,@X[$i%16]
231 ___
232 }
233
234 $code=<<___;
235 #ifdef __linux__
236 #include <asm/regdef.h>
237 #else
238 #include <asm.h>
239 #include <regdef.h>
240 #endif
241
242 .text
243
244 .set noat
245 .set noreorder
246 .globl sha1_block_data_order
247 .align 5
248 .ent sha1_block_data_order
249 sha1_block_data_order:
250 lda sp,-64(sp)
251 stq ra,0(sp)
252 stq s0,8(sp)
253 stq s1,16(sp)
254 stq s2,24(sp)
255 stq s3,32(sp)
256 stq s4,40(sp)
257 stq s5,48(sp)
258 stq fp,56(sp)
259 .mask 0x0400fe00,-64
260 .frame sp,64,ra
261 .prologue 0
262
263 ldl $A,0($ctx)
264 ldl $B,4($ctx)
265 sll $num,6,$num
266 ldl $C,8($ctx)
267 ldl $D,12($ctx)
268 ldl $E,16($ctx)
269 addq $inp,$num,$num
270
271 .Lloop:
272 .set noreorder
273 ldah $K,23170(zero)
274 zapnot $B,0xf,$B
275 lda $K,31129($K) # K_00_19
276 ___
277 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
278
279 $code.=<<___;
280 ldah $K,28378(zero)
281 lda $K,-5215($K) # K_20_39
282 ___
283 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
284
285 $code.=<<___;
286 ldah $K,-28900(zero)
287 lda $K,-17188($K) # K_40_59
288 ___
289 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
290
291 $code.=<<___;
292 ldah $K,-13725(zero)
293 lda $K,-15914($K) # K_60_79
294 ___
295 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
296
297 $code.=<<___;
298 addl @X[0],$A,$A
299 addl @X[1],$B,$B
300 addl @X[2],$C,$C
301 addl @X[3],$D,$D
302 addl @X[4],$E,$E
303 stl $A,0($ctx)
304 stl $B,4($ctx)
305 addq $inp,64,$inp
306 stl $C,8($ctx)
307 stl $D,12($ctx)
308 stl $E,16($ctx)
309 cmpult $inp,$num,$t1
310 bne $t1,.Lloop
311
312 .set noreorder
313 ldq ra,0(sp)
314 ldq s0,8(sp)
315 ldq s1,16(sp)
316 ldq s2,24(sp)
317 ldq s3,32(sp)
318 ldq s4,40(sp)
319 ldq s5,48(sp)
320 ldq fp,56(sp)
321 lda sp,64(sp)
322 ret (ra)
323 .end sha1_block_data_order
324 .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
325 .align 2
326 ___
327 $output=pop and open STDOUT,">$output";
328 print $code;
329 close STDOUT;