]>
Commit | Line | Data |
---|---|---|
6aa36e8e RS |
1 | #! /usr/bin/env perl |
2 | # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
4 | # Licensed under the OpenSSL license (the "License"). You may not use | |
5 | # this file except in compliance with the License. You can obtain a copy | |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | ||
a83f83aa AP |
9 | |
10 | # ==================================================================== | |
e3713c36 | 11 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
a83f83aa AP |
12 | # project. The module is, however, dual licensed under OpenSSL and |
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
14 | # details see http://www.openssl.org/~appro/cryptogams/. | |
15 | # ==================================================================== | |
16 | ||
17 | # SHA256/512 block procedure for PA-RISC. | |
18 | ||
19 | # June 2009. | |
20 | # | |
21 | # SHA256 performance is >75% better than gcc 3.2 generated code on | |
22 | # PA-7100LC. Compared to code generated by vendor compiler this | |
23 | # implementation is almost 70% faster in 64-bit build, but delivers | |
24 | # virtually same performance in 32-bit build on PA-8600. | |
25 | # | |
26 | # SHA512 performance is >2.9x better than gcc 3.2 generated code on | |
27 | # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the | |
28 | # code is executed on PA-RISC 2.0 processor and switches to 64-bit | |
d900a015 | 29 | # code path delivering adequate performance even in "blended" 32-bit |
a83f83aa AP |
30 | # build. Though 64-bit code is not any faster than code generated by |
31 | # vendor compiler on PA-8600... | |
32 | # | |
33 | # Special thanks to polarhome.com for providing HP-UX account. | |
34 | ||
35 | $flavour = shift; | |
36 | $output = shift; | |
37 | open STDOUT,">$output"; | |
38 | ||
39 | if ($flavour =~ /64/) { | |
40 | $LEVEL ="2.0W"; | |
41 | $SIZE_T =8; | |
42 | $FRAME_MARKER =80; | |
43 | $SAVED_RP =16; | |
44 | $PUSH ="std"; | |
45 | $PUSHMA ="std,ma"; | |
46 | $POP ="ldd"; | |
47 | $POPMB ="ldd,mb"; | |
48 | } else { | |
49 | $LEVEL ="1.0"; | |
50 | $SIZE_T =4; | |
51 | $FRAME_MARKER =48; | |
52 | $SAVED_RP =20; | |
53 | $PUSH ="stw"; | |
54 | $PUSHMA ="stwm"; | |
55 | $POP ="ldw"; | |
56 | $POPMB ="ldwm"; | |
57 | } | |
58 | ||
59 | if ($output =~ /512/) { | |
60 | $func="sha512_block_data_order"; | |
61 | $SZ=8; | |
62 | @Sigma0=(28,34,39); | |
63 | @Sigma1=(14,18,41); | |
64 | @sigma0=(1, 8, 7); | |
65 | @sigma1=(19,61, 6); | |
66 | $rounds=80; | |
67 | $LAST10BITS=0x017; | |
68 | $LD="ldd"; | |
69 | $LDM="ldd,ma"; | |
70 | $ST="std"; | |
71 | } else { | |
72 | $func="sha256_block_data_order"; | |
73 | $SZ=4; | |
74 | @Sigma0=( 2,13,22); | |
75 | @Sigma1=( 6,11,25); | |
76 | @sigma0=( 7,18, 3); | |
77 | @sigma1=(17,19,10); | |
78 | $rounds=64; | |
79 | $LAST10BITS=0x0f2; | |
80 | $LD="ldw"; | |
81 | $LDM="ldwm"; | |
82 | $ST="stw"; | |
83 | } | |
84 | ||
85 | $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker | |
86 | # [+ argument transfer] | |
87 | $XOFF=16*$SZ+32; # local variables | |
88 | $FRAME+=$XOFF; | |
89 | $XOFF+=$FRAME_MARKER; # distance between %sp and local variables | |
90 | ||
91 | $ctx="%r26"; # zapped by $a0 | |
92 | $inp="%r25"; # zapped by $a1 | |
93 | $num="%r24"; # zapped by $t0 | |
94 | ||
95 | $a0 ="%r26"; | |
96 | $a1 ="%r25"; | |
97 | $t0 ="%r24"; | |
98 | $t1 ="%r29"; | |
99 | $Tbl="%r31"; | |
100 | ||
101 | @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); | |
102 | ||
103 | @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", | |
104 | "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); | |
105 | ||
106 | sub ROUND_00_15 { | |
107 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; | |
108 | $code.=<<___; | |
109 | _ror $e,$Sigma1[0],$a0 | |
110 | and $f,$e,$t0 | |
111 | _ror $e,$Sigma1[1],$a1 | |
112 | addl $t1,$h,$h | |
113 | andcm $g,$e,$t1 | |
114 | xor $a1,$a0,$a0 | |
115 | _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 | |
116 | or $t0,$t1,$t1 ; Ch(e,f,g) | |
117 | addl @X[$i%16],$h,$h | |
118 | xor $a0,$a1,$a1 ; Sigma1(e) | |
119 | addl $t1,$h,$h | |
120 | _ror $a,$Sigma0[0],$a0 | |
121 | addl $a1,$h,$h | |
122 | ||
123 | _ror $a,$Sigma0[1],$a1 | |
124 | and $a,$b,$t0 | |
125 | and $a,$c,$t1 | |
126 | xor $a1,$a0,$a0 | |
127 | _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 | |
128 | xor $t1,$t0,$t0 | |
129 | and $b,$c,$t1 | |
130 | xor $a0,$a1,$a1 ; Sigma0(a) | |
131 | addl $h,$d,$d | |
132 | xor $t1,$t0,$t0 ; Maj(a,b,c) | |
133 | `"$LDM $SZ($Tbl),$t1" if ($i<15)` | |
134 | addl $a1,$h,$h | |
135 | addl $t0,$h,$h | |
136 | ||
137 | ___ | |
138 | } | |
139 | ||
140 | sub ROUND_16_xx { | |
141 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; | |
142 | $i-=16; | |
143 | $code.=<<___; | |
144 | _ror @X[($i+1)%16],$sigma0[0],$a0 | |
145 | _ror @X[($i+1)%16],$sigma0[1],$a1 | |
146 | addl @X[($i+9)%16],@X[$i],@X[$i] | |
147 | _ror @X[($i+14)%16],$sigma1[0],$t0 | |
148 | _ror @X[($i+14)%16],$sigma1[1],$t1 | |
149 | xor $a1,$a0,$a0 | |
150 | _shr @X[($i+1)%16],$sigma0[2],$a1 | |
151 | xor $t1,$t0,$t0 | |
152 | _shr @X[($i+14)%16],$sigma1[2],$t1 | |
153 | xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) | |
154 | xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) | |
155 | $LDM $SZ($Tbl),$t1 | |
156 | addl $a0,@X[$i],@X[$i] | |
157 | addl $t0,@X[$i],@X[$i] | |
158 | ___ | |
159 | $code.=<<___ if ($i==15); | |
160 | extru $t1,31,10,$a1 | |
161 | comiclr,<> $LAST10BITS,$a1,%r0 | |
162 | ldo 1($Tbl),$Tbl ; signal end of $Tbl | |
163 | ___ | |
164 | &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); | |
165 | } | |
166 | ||
167 | $code=<<___; | |
168 | .LEVEL $LEVEL | |
169 | .SPACE \$TEXT\$ | |
170 | .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY | |
171 | ||
172 | .ALIGN 64 | |
173 | L\$table | |
174 | ___ | |
175 | $code.=<<___ if ($SZ==8); | |
176 | .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd | |
177 | .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc | |
178 | .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 | |
179 | .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 | |
180 | .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe | |
181 | .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 | |
182 | .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 | |
183 | .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 | |
184 | .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 | |
185 | .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 | |
186 | .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 | |
187 | .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 | |
188 | .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 | |
189 | .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 | |
190 | .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 | |
191 | .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 | |
192 | .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 | |
193 | .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df | |
194 | .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 | |
195 | .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b | |
196 | .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 | |
197 | .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 | |
198 | .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 | |
199 | .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 | |
200 | .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 | |
201 | .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 | |
202 | .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb | |
203 | .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 | |
204 | .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 | |
205 | .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec | |
206 | .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 | |
207 | .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b | |
208 | .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 | |
209 | .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 | |
210 | .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 | |
211 | .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b | |
212 | .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 | |
213 | .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c | |
214 | .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a | |
215 | .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 | |
216 | ___ | |
217 | $code.=<<___ if ($SZ==4); | |
218 | .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | |
219 | .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | |
220 | .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | |
221 | .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | |
222 | .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | |
223 | .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | |
224 | .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | |
225 | .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | |
226 | .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | |
227 | .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | |
228 | .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | |
229 | .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | |
230 | .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | |
231 | .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | |
232 | .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | |
233 | .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | |
234 | ___ | |
235 | $code.=<<___; | |
236 | ||
237 | .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR | |
238 | .ALIGN 64 | |
239 | $func | |
240 | .PROC | |
241 | .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 | |
242 | .ENTRY | |
243 | $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue | |
244 | $PUSHMA %r3,$FRAME(%sp) | |
245 | $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) | |
246 | $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) | |
247 | $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) | |
248 | $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) | |
249 | $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) | |
250 | $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) | |
251 | $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) | |
252 | $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) | |
253 | $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) | |
254 | $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) | |
255 | $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) | |
256 | $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) | |
257 | $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) | |
258 | $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) | |
259 | $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) | |
260 | ||
261 | _shl $num,`log(16*$SZ)/log(2)`,$num | |
262 | addl $inp,$num,$num ; $num to point at the end of $inp | |
263 | ||
264 | $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments | |
265 | $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) | |
266 | $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) | |
267 | ||
268 | blr %r0,$Tbl | |
269 | ldi 3,$t1 | |
270 | L\$pic | |
271 | andcm $Tbl,$t1,$Tbl ; wipe privilege level | |
272 | ldo L\$table-L\$pic($Tbl),$Tbl | |
273 | ___ | |
274 | $code.=<<___ if ($SZ==8 && $SIZE_T==4); | |
275 | ldi 31,$t1 | |
276 | mtctl $t1,%cr11 | |
277 | extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 | |
278 | b L\$parisc1 | |
279 | nop | |
280 | ___ | |
281 | $code.=<<___; | |
282 | $LD `0*$SZ`($ctx),$A ; load context | |
283 | $LD `1*$SZ`($ctx),$B | |
284 | $LD `2*$SZ`($ctx),$C | |
285 | $LD `3*$SZ`($ctx),$D | |
286 | $LD `4*$SZ`($ctx),$E | |
287 | $LD `5*$SZ`($ctx),$F | |
288 | $LD `6*$SZ`($ctx),$G | |
289 | $LD `7*$SZ`($ctx),$H | |
290 | ||
291 | extru $inp,31,`log($SZ)/log(2)`,$t0 | |
292 | sh3addl $t0,%r0,$t0 | |
293 | subi `8*$SZ`,$t0,$t0 | |
294 | mtctl $t0,%cr11 ; load %sar with align factor | |
295 | ||
296 | L\$oop | |
297 | ldi `$SZ-1`,$t0 | |
298 | $LDM $SZ($Tbl),$t1 | |
299 | andcm $inp,$t0,$t0 ; align $inp | |
300 | ___ | |
301 | for ($i=0;$i<15;$i++) { # load input block | |
302 | $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } | |
303 | $code.=<<___; | |
304 | cmpb,*= $inp,$t0,L\$aligned | |
305 | $LD `$SZ*15`($t0),@X[15] | |
306 | $LD `$SZ*16`($t0),@X[16] | |
307 | ___ | |
308 | for ($i=0;$i<16;$i++) { # align data | |
309 | $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } | |
310 | $code.=<<___; | |
311 | L\$aligned | |
312 | nop ; otherwise /usr/ccs/bin/as is confused by below .WORD | |
313 | ___ | |
314 | ||
315 | for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } | |
316 | $code.=<<___; | |
317 | L\$rounds | |
318 | nop ; otherwise /usr/ccs/bin/as is confused by below .WORD | |
319 | ___ | |
320 | for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } | |
321 | $code.=<<___; | |
322 | bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? | |
323 | nop | |
324 | ||
325 | $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments | |
326 | $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp | |
327 | $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num | |
328 | ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl | |
329 | ||
330 | $LD `0*$SZ`($ctx),@X[0] ; load context | |
331 | $LD `1*$SZ`($ctx),@X[1] | |
332 | $LD `2*$SZ`($ctx),@X[2] | |
333 | $LD `3*$SZ`($ctx),@X[3] | |
334 | $LD `4*$SZ`($ctx),@X[4] | |
335 | $LD `5*$SZ`($ctx),@X[5] | |
336 | addl @X[0],$A,$A | |
337 | $LD `6*$SZ`($ctx),@X[6] | |
338 | addl @X[1],$B,$B | |
339 | $LD `7*$SZ`($ctx),@X[7] | |
340 | ldo `16*$SZ`($inp),$inp ; advance $inp | |
341 | ||
342 | $ST $A,`0*$SZ`($ctx) ; save context | |
343 | addl @X[2],$C,$C | |
344 | $ST $B,`1*$SZ`($ctx) | |
345 | addl @X[3],$D,$D | |
346 | $ST $C,`2*$SZ`($ctx) | |
347 | addl @X[4],$E,$E | |
348 | $ST $D,`3*$SZ`($ctx) | |
349 | addl @X[5],$F,$F | |
350 | $ST $E,`4*$SZ`($ctx) | |
351 | addl @X[6],$G,$G | |
352 | $ST $F,`5*$SZ`($ctx) | |
353 | addl @X[7],$H,$H | |
354 | $ST $G,`6*$SZ`($ctx) | |
355 | $ST $H,`7*$SZ`($ctx) | |
356 | ||
357 | cmpb,*<>,n $inp,$num,L\$oop | |
358 | $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp | |
359 | ___ | |
360 | if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 | |
361 | {{ | |
362 | $code.=<<___; | |
363 | b L\$done | |
364 | nop | |
365 | ||
366 | .ALIGN 64 | |
367 | L\$parisc1 | |
368 | ___ | |
369 | ||
370 | @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, | |
609b0852 | 371 | $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = |
a83f83aa AP |
372 | ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", |
373 | "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); | |
374 | $a0 ="%r17"; | |
375 | $a1 ="%r18"; | |
376 | $a2 ="%r19"; | |
377 | $a3 ="%r20"; | |
378 | $t0 ="%r21"; | |
379 | $t1 ="%r22"; | |
380 | $t2 ="%r28"; | |
381 | $t3 ="%r29"; | |
382 | $Tbl="%r31"; | |
383 | ||
384 | @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx | |
385 | ||
386 | sub ROUND_00_15_pa1 { | |
387 | my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, | |
388 | $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; | |
389 | my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; | |
390 | ||
391 | $code.=<<___ if (!$flag); | |
392 | ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi | |
393 | ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] | |
394 | ___ | |
395 | $code.=<<___; | |
396 | shd $ehi,$elo,$Sigma1[0],$t0 | |
397 | add $Xlo,$hlo,$hlo | |
398 | shd $elo,$ehi,$Sigma1[0],$t1 | |
399 | addc $Xhi,$hhi,$hhi ; h += X[i] | |
400 | shd $ehi,$elo,$Sigma1[1],$t2 | |
b57599b7 | 401 | ldwm 8($Tbl),$Xhi |
a83f83aa AP |
402 | shd $elo,$ehi,$Sigma1[1],$t3 |
403 | ldw -4($Tbl),$Xlo ; load K[i] | |
404 | xor $t2,$t0,$t0 | |
405 | xor $t3,$t1,$t1 | |
406 | and $flo,$elo,$a0 | |
407 | and $fhi,$ehi,$a1 | |
408 | shd $ehi,$elo,$Sigma1[2],$t2 | |
409 | andcm $glo,$elo,$a2 | |
410 | shd $elo,$ehi,$Sigma1[2],$t3 | |
411 | andcm $ghi,$ehi,$a3 | |
412 | xor $t2,$t0,$t0 | |
413 | xor $t3,$t1,$t1 ; Sigma1(e) | |
414 | add $Xlo,$hlo,$hlo | |
415 | xor $a2,$a0,$a0 | |
416 | addc $Xhi,$hhi,$hhi ; h += K[i] | |
417 | xor $a3,$a1,$a1 ; Ch(e,f,g) | |
418 | ||
419 | add $t0,$hlo,$hlo | |
420 | shd $ahi,$alo,$Sigma0[0],$t0 | |
421 | addc $t1,$hhi,$hhi ; h += Sigma1(e) | |
609b0852 | 422 | shd $alo,$ahi,$Sigma0[0],$t1 |
a83f83aa AP |
423 | add $a0,$hlo,$hlo |
424 | shd $ahi,$alo,$Sigma0[1],$t2 | |
425 | addc $a1,$hhi,$hhi ; h += Ch(e,f,g) | |
426 | shd $alo,$ahi,$Sigma0[1],$t3 | |
427 | ||
428 | xor $t2,$t0,$t0 | |
429 | xor $t3,$t1,$t1 | |
430 | shd $ahi,$alo,$Sigma0[2],$t2 | |
431 | and $alo,$blo,$a0 | |
432 | shd $alo,$ahi,$Sigma0[2],$t3 | |
433 | and $ahi,$bhi,$a1 | |
434 | xor $t2,$t0,$t0 | |
435 | xor $t3,$t1,$t1 ; Sigma0(a) | |
436 | ||
437 | and $alo,$clo,$a2 | |
438 | and $ahi,$chi,$a3 | |
439 | xor $a2,$a0,$a0 | |
440 | add $hlo,$dlo,$dlo | |
441 | xor $a3,$a1,$a1 | |
442 | addc $hhi,$dhi,$dhi ; d += h | |
443 | and $blo,$clo,$a2 | |
444 | add $t0,$hlo,$hlo | |
445 | and $bhi,$chi,$a3 | |
446 | addc $t1,$hhi,$hhi ; h += Sigma0(a) | |
447 | xor $a2,$a0,$a0 | |
448 | add $a0,$hlo,$hlo | |
449 | xor $a3,$a1,$a1 ; Maj(a,b,c) | |
450 | addc $a1,$hhi,$hhi ; h += Maj(a,b,c) | |
451 | ||
452 | ___ | |
453 | $code.=<<___ if ($i==15 && $flag); | |
454 | extru $Xlo,31,10,$Xlo | |
455 | comiclr,= $LAST10BITS,$Xlo,%r0 | |
456 | b L\$rounds_pa1 | |
457 | nop | |
458 | ___ | |
459 | push(@X,shift(@X)); push(@X,shift(@X)); | |
460 | } | |
461 | ||
462 | sub ROUND_16_xx_pa1 { | |
463 | my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; | |
464 | my ($i)=shift; | |
465 | $i-=16; | |
466 | $code.=<<___; | |
467 | ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi | |
468 | ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] | |
469 | ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 | |
470 | ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] | |
471 | ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 | |
472 | ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] | |
473 | shd $Xnhi,$Xnlo,$sigma0[0],$t0 | |
474 | shd $Xnlo,$Xnhi,$sigma0[0],$t1 | |
475 | add $a0,$Xlo,$Xlo | |
476 | shd $Xnhi,$Xnlo,$sigma0[1],$t2 | |
477 | addc $a1,$Xhi,$Xhi | |
478 | shd $Xnlo,$Xnhi,$sigma0[1],$t3 | |
479 | xor $t2,$t0,$t0 | |
480 | shd $Xnhi,$Xnlo,$sigma0[2],$t2 | |
481 | xor $t3,$t1,$t1 | |
482 | extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 | |
483 | xor $t2,$t0,$t0 | |
484 | shd $a3,$a2,$sigma1[0],$a0 | |
485 | xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) | |
486 | shd $a2,$a3,$sigma1[0],$a1 | |
487 | add $t0,$Xlo,$Xlo | |
488 | shd $a3,$a2,$sigma1[1],$t2 | |
489 | addc $t1,$Xhi,$Xhi | |
490 | shd $a2,$a3,$sigma1[1],$t3 | |
491 | xor $t2,$a0,$a0 | |
492 | shd $a3,$a2,$sigma1[2],$t2 | |
493 | xor $t3,$a1,$a1 | |
494 | extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 | |
495 | xor $t2,$a0,$a0 | |
496 | xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) | |
497 | add $a0,$Xlo,$Xlo | |
498 | addc $a1,$Xhi,$Xhi | |
499 | ||
500 | stw $Xhi,`-$XOFF+8*($i%16)`(%sp) | |
501 | stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) | |
502 | ___ | |
503 | &ROUND_00_15_pa1($i,@_,1); | |
504 | } | |
505 | $code.=<<___; | |
506 | ldw `0*4`($ctx),$Ahi ; load context | |
507 | ldw `1*4`($ctx),$Alo | |
508 | ldw `2*4`($ctx),$Bhi | |
509 | ldw `3*4`($ctx),$Blo | |
510 | ldw `4*4`($ctx),$Chi | |
511 | ldw `5*4`($ctx),$Clo | |
512 | ldw `6*4`($ctx),$Dhi | |
513 | ldw `7*4`($ctx),$Dlo | |
514 | ldw `8*4`($ctx),$Ehi | |
515 | ldw `9*4`($ctx),$Elo | |
516 | ldw `10*4`($ctx),$Fhi | |
517 | ldw `11*4`($ctx),$Flo | |
518 | ldw `12*4`($ctx),$Ghi | |
519 | ldw `13*4`($ctx),$Glo | |
520 | ldw `14*4`($ctx),$Hhi | |
521 | ldw `15*4`($ctx),$Hlo | |
522 | ||
523 | extru $inp,31,2,$t0 | |
524 | sh3addl $t0,%r0,$t0 | |
525 | subi 32,$t0,$t0 | |
526 | mtctl $t0,%cr11 ; load %sar with align factor | |
527 | ||
528 | L\$oop_pa1 | |
529 | extru $inp,31,2,$a3 | |
530 | comib,= 0,$a3,L\$aligned_pa1 | |
531 | sub $inp,$a3,$inp | |
532 | ||
533 | ldw `0*4`($inp),$X[0] | |
534 | ldw `1*4`($inp),$X[1] | |
535 | ldw `2*4`($inp),$t2 | |
536 | ldw `3*4`($inp),$t3 | |
537 | ldw `4*4`($inp),$a0 | |
538 | ldw `5*4`($inp),$a1 | |
539 | ldw `6*4`($inp),$a2 | |
540 | ldw `7*4`($inp),$a3 | |
541 | vshd $X[0],$X[1],$X[0] | |
542 | vshd $X[1],$t2,$X[1] | |
543 | stw $X[0],`-$XOFF+0*4`(%sp) | |
544 | ldw `8*4`($inp),$t0 | |
545 | vshd $t2,$t3,$t2 | |
546 | stw $X[1],`-$XOFF+1*4`(%sp) | |
547 | ldw `9*4`($inp),$t1 | |
548 | vshd $t3,$a0,$t3 | |
549 | ___ | |
550 | { | |
551 | my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); | |
552 | for ($i=2;$i<=(128/4-8);$i++) { | |
553 | $code.=<<___; | |
554 | stw $t[0],`-$XOFF+$i*4`(%sp) | |
555 | ldw `(8+$i)*4`($inp),$t[0] | |
556 | vshd $t[1],$t[2],$t[1] | |
557 | ___ | |
558 | push(@t,shift(@t)); | |
559 | } | |
560 | for (;$i<(128/4-1);$i++) { | |
561 | $code.=<<___; | |
562 | stw $t[0],`-$XOFF+$i*4`(%sp) | |
563 | vshd $t[1],$t[2],$t[1] | |
564 | ___ | |
565 | push(@t,shift(@t)); | |
566 | } | |
567 | $code.=<<___; | |
568 | b L\$collected_pa1 | |
569 | stw $t[0],`-$XOFF+$i*4`(%sp) | |
570 | ||
571 | ___ | |
572 | } | |
573 | $code.=<<___; | |
574 | L\$aligned_pa1 | |
575 | ldw `0*4`($inp),$X[0] | |
576 | ldw `1*4`($inp),$X[1] | |
577 | ldw `2*4`($inp),$t2 | |
578 | ldw `3*4`($inp),$t3 | |
579 | ldw `4*4`($inp),$a0 | |
580 | ldw `5*4`($inp),$a1 | |
581 | ldw `6*4`($inp),$a2 | |
582 | ldw `7*4`($inp),$a3 | |
583 | stw $X[0],`-$XOFF+0*4`(%sp) | |
584 | ldw `8*4`($inp),$t0 | |
585 | stw $X[1],`-$XOFF+1*4`(%sp) | |
586 | ldw `9*4`($inp),$t1 | |
587 | ___ | |
588 | { | |
589 | my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); | |
590 | for ($i=2;$i<(128/4-8);$i++) { | |
591 | $code.=<<___; | |
592 | stw $t[0],`-$XOFF+$i*4`(%sp) | |
593 | ldw `(8+$i)*4`($inp),$t[0] | |
594 | ___ | |
595 | push(@t,shift(@t)); | |
596 | } | |
597 | for (;$i<128/4;$i++) { | |
598 | $code.=<<___; | |
599 | stw $t[0],`-$XOFF+$i*4`(%sp) | |
600 | ___ | |
601 | push(@t,shift(@t)); | |
602 | } | |
603 | $code.="L\$collected_pa1\n"; | |
604 | } | |
605 | ||
606 | for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } | |
607 | $code.="L\$rounds_pa1\n"; | |
608 | for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } | |
609 | ||
610 | $code.=<<___; | |
611 | $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments | |
612 | $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp | |
613 | $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num | |
614 | ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl | |
615 | ||
616 | ldw `0*4`($ctx),$t1 ; update context | |
617 | ldw `1*4`($ctx),$t0 | |
618 | ldw `2*4`($ctx),$t3 | |
619 | ldw `3*4`($ctx),$t2 | |
620 | ldw `4*4`($ctx),$a1 | |
621 | ldw `5*4`($ctx),$a0 | |
622 | ldw `6*4`($ctx),$a3 | |
623 | add $t0,$Alo,$Alo | |
624 | ldw `7*4`($ctx),$a2 | |
625 | addc $t1,$Ahi,$Ahi | |
626 | ldw `8*4`($ctx),$t1 | |
627 | add $t2,$Blo,$Blo | |
628 | ldw `9*4`($ctx),$t0 | |
629 | addc $t3,$Bhi,$Bhi | |
630 | ldw `10*4`($ctx),$t3 | |
631 | add $a0,$Clo,$Clo | |
632 | ldw `11*4`($ctx),$t2 | |
633 | addc $a1,$Chi,$Chi | |
634 | ldw `12*4`($ctx),$a1 | |
635 | add $a2,$Dlo,$Dlo | |
636 | ldw `13*4`($ctx),$a0 | |
637 | addc $a3,$Dhi,$Dhi | |
638 | ldw `14*4`($ctx),$a3 | |
639 | add $t0,$Elo,$Elo | |
640 | ldw `15*4`($ctx),$a2 | |
641 | addc $t1,$Ehi,$Ehi | |
642 | stw $Ahi,`0*4`($ctx) | |
643 | add $t2,$Flo,$Flo | |
644 | stw $Alo,`1*4`($ctx) | |
645 | addc $t3,$Fhi,$Fhi | |
646 | stw $Bhi,`2*4`($ctx) | |
647 | add $a0,$Glo,$Glo | |
648 | stw $Blo,`3*4`($ctx) | |
649 | addc $a1,$Ghi,$Ghi | |
650 | stw $Chi,`4*4`($ctx) | |
651 | add $a2,$Hlo,$Hlo | |
652 | stw $Clo,`5*4`($ctx) | |
653 | addc $a3,$Hhi,$Hhi | |
654 | stw $Dhi,`6*4`($ctx) | |
655 | ldo `16*$SZ`($inp),$inp ; advance $inp | |
656 | stw $Dlo,`7*4`($ctx) | |
657 | stw $Ehi,`8*4`($ctx) | |
658 | stw $Elo,`9*4`($ctx) | |
659 | stw $Fhi,`10*4`($ctx) | |
660 | stw $Flo,`11*4`($ctx) | |
661 | stw $Ghi,`12*4`($ctx) | |
662 | stw $Glo,`13*4`($ctx) | |
663 | stw $Hhi,`14*4`($ctx) | |
664 | comb,= $inp,$num,L\$done | |
665 | stw $Hlo,`15*4`($ctx) | |
666 | b L\$oop_pa1 | |
667 | $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp | |
668 | L\$done | |
669 | ___ | |
670 | }} | |
671 | $code.=<<___; | |
672 | $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue | |
673 | $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 | |
674 | $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 | |
675 | $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 | |
676 | $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 | |
677 | $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 | |
678 | $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 | |
679 | $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 | |
680 | $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 | |
681 | $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 | |
682 | $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 | |
683 | $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 | |
684 | $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 | |
685 | $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 | |
686 | $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 | |
687 | $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 | |
688 | bv (%r2) | |
689 | .EXIT | |
690 | $POPMB -$FRAME(%sp),%r3 | |
691 | .PROCEND | |
692 | .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" | |
693 | ___ | |
694 | ||
695 | # Explicitly encode PA-RISC 2.0 instructions used in this module, so | |
696 | # that it can be compiled with .LEVEL 1.0. It should be noted that I | |
697 | # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 | |
698 | # directive... | |
699 | ||
700 | my $ldd = sub { | |
701 | my ($mod,$args) = @_; | |
702 | my $orig = "ldd$mod\t$args"; | |
703 | ||
3fc2efd2 | 704 | if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices |
b57599b7 AP |
705 | { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); |
706 | $opcode|=(1<<3) if ($mod =~ /^,m/); | |
707 | $opcode|=(1<<2) if ($mod =~ /^,mb/); | |
a83f83aa AP |
708 | sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; |
709 | } | |
710 | else { "\t".$orig; } | |
711 | }; | |
712 | ||
713 | my $std = sub { | |
714 | my ($mod,$args) = @_; | |
715 | my $orig = "std$mod\t$args"; | |
716 | ||
3fc2efd2 | 717 | if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices |
b57599b7 AP |
718 | { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); |
719 | sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
a83f83aa AP |
720 | } |
721 | else { "\t".$orig; } | |
722 | }; | |
723 | ||
724 | my $extrd = sub { | |
725 | my ($mod,$args) = @_; | |
726 | my $orig = "extrd$mod\t$args"; | |
727 | ||
728 | # I only have ",u" completer, it's implicitly encoded... | |
729 | if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 | |
730 | { my $opcode=(0x36<<26)|($1<<21)|($4<<16); | |
731 | my $len=32-$3; | |
732 | $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos | |
733 | $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len | |
734 | sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
735 | } | |
736 | elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 | |
737 | { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); | |
738 | my $len=32-$2; | |
739 | $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len | |
740 | $opcode |= (1<<13) if ($mod =~ /,\**=/); | |
741 | sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
742 | } | |
743 | else { "\t".$orig; } | |
744 | }; | |
745 | ||
746 | my $shrpd = sub { | |
747 | my ($mod,$args) = @_; | |
748 | my $orig = "shrpd$mod\t$args"; | |
749 | ||
750 | if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 | |
751 | { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; | |
752 | my $cpos=63-$3; | |
753 | $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa | |
754 | sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
755 | } | |
756 | elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 | |
757 | { sprintf "\t.WORD\t0x%08x\t; %s", | |
758 | (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; | |
759 | } | |
760 | else { "\t".$orig; } | |
761 | }; | |
762 | ||
763 | sub assemble { | |
764 | my ($mnemonic,$mod,$args)=@_; | |
765 | my $opcode = eval("\$$mnemonic"); | |
766 | ||
767 | ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; | |
768 | } | |
769 | ||
770 | foreach (split("\n",$code)) { | |
771 | s/\`([^\`]*)\`/eval $1/ge; | |
772 | ||
773 | s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ | |
774 | $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32 | |
775 | : sprintf("shd\t%$1,%$2,%d",$3)/e or | |
46f4e1be | 776 | # translate made up instructions: _ror, _shr, _align, _shl |
a83f83aa AP |
777 | s/_ror(\s+)(%r[0-9]+),/ |
778 | ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or | |
779 | ||
780 | s/_shr(\s+%r[0-9]+),([0-9]+),/ | |
781 | $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) | |
782 | : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or | |
783 | ||
784 | s/_align(\s+%r[0-9]+,%r[0-9]+),/ | |
785 | ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or | |
786 | ||
787 | s/_shl(\s+%r[0-9]+),([0-9]+),/ | |
788 | $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) | |
789 | : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; | |
790 | ||
791 | s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); | |
792 | ||
793 | s/cmpb,\*/comb,/ if ($SIZE_T==4); | |
794 | ||
02450ec6 AP |
795 | s/\bbv\b/bve/ if ($SIZE_T==8); |
796 | ||
a83f83aa AP |
797 | print $_,"\n"; |
798 | } | |
799 | ||
800 | close STDOUT; |