]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/asm/sha512-parisc.pl
Many spelling fixes/typo's corrected.
[thirdparty/openssl.git] / crypto / sha / asm / sha512-parisc.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
a83f83aa
AP
9
10# ====================================================================
e3713c36 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
a83f83aa
AP
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# SHA256/512 block procedure for PA-RISC.
18
19# June 2009.
20#
21# SHA256 performance is >75% better than gcc 3.2 generated code on
22# PA-7100LC. Compared to code generated by vendor compiler this
23# implementation is almost 70% faster in 64-bit build, but delivers
24# virtually same performance in 32-bit build on PA-8600.
25#
26# SHA512 performance is >2.9x better than gcc 3.2 generated code on
27# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
28# code is executed on PA-RISC 2.0 processor and switches to 64-bit
d900a015 29# code path delivering adequate performance even in "blended" 32-bit
a83f83aa
AP
30# build. Though 64-bit code is not any faster than code generated by
31# vendor compiler on PA-8600...
32#
33# Special thanks to polarhome.com for providing HP-UX account.
34
35$flavour = shift;
36$output = shift;
37open STDOUT,">$output";
38
39if ($flavour =~ /64/) {
40 $LEVEL ="2.0W";
41 $SIZE_T =8;
42 $FRAME_MARKER =80;
43 $SAVED_RP =16;
44 $PUSH ="std";
45 $PUSHMA ="std,ma";
46 $POP ="ldd";
47 $POPMB ="ldd,mb";
48} else {
49 $LEVEL ="1.0";
50 $SIZE_T =4;
51 $FRAME_MARKER =48;
52 $SAVED_RP =20;
53 $PUSH ="stw";
54 $PUSHMA ="stwm";
55 $POP ="ldw";
56 $POPMB ="ldwm";
57}
58
59if ($output =~ /512/) {
60 $func="sha512_block_data_order";
61 $SZ=8;
62 @Sigma0=(28,34,39);
63 @Sigma1=(14,18,41);
64 @sigma0=(1, 8, 7);
65 @sigma1=(19,61, 6);
66 $rounds=80;
67 $LAST10BITS=0x017;
68 $LD="ldd";
69 $LDM="ldd,ma";
70 $ST="std";
71} else {
72 $func="sha256_block_data_order";
73 $SZ=4;
74 @Sigma0=( 2,13,22);
75 @Sigma1=( 6,11,25);
76 @sigma0=( 7,18, 3);
77 @sigma1=(17,19,10);
78 $rounds=64;
79 $LAST10BITS=0x0f2;
80 $LD="ldw";
81 $LDM="ldwm";
82 $ST="stw";
83}
84
85$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
86 # [+ argument transfer]
87$XOFF=16*$SZ+32; # local variables
88$FRAME+=$XOFF;
89$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
90
91$ctx="%r26"; # zapped by $a0
92$inp="%r25"; # zapped by $a1
93$num="%r24"; # zapped by $t0
94
95$a0 ="%r26";
96$a1 ="%r25";
97$t0 ="%r24";
98$t1 ="%r29";
99$Tbl="%r31";
100
101@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
102
103@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
104 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
105
106sub ROUND_00_15 {
107my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
108$code.=<<___;
109 _ror $e,$Sigma1[0],$a0
110 and $f,$e,$t0
111 _ror $e,$Sigma1[1],$a1
112 addl $t1,$h,$h
113 andcm $g,$e,$t1
114 xor $a1,$a0,$a0
115 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
116 or $t0,$t1,$t1 ; Ch(e,f,g)
117 addl @X[$i%16],$h,$h
118 xor $a0,$a1,$a1 ; Sigma1(e)
119 addl $t1,$h,$h
120 _ror $a,$Sigma0[0],$a0
121 addl $a1,$h,$h
122
123 _ror $a,$Sigma0[1],$a1
124 and $a,$b,$t0
125 and $a,$c,$t1
126 xor $a1,$a0,$a0
127 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
128 xor $t1,$t0,$t0
129 and $b,$c,$t1
130 xor $a0,$a1,$a1 ; Sigma0(a)
131 addl $h,$d,$d
132 xor $t1,$t0,$t0 ; Maj(a,b,c)
133 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
134 addl $a1,$h,$h
135 addl $t0,$h,$h
136
137___
138}
139
140sub ROUND_16_xx {
141my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
142$i-=16;
143$code.=<<___;
144 _ror @X[($i+1)%16],$sigma0[0],$a0
145 _ror @X[($i+1)%16],$sigma0[1],$a1
146 addl @X[($i+9)%16],@X[$i],@X[$i]
147 _ror @X[($i+14)%16],$sigma1[0],$t0
148 _ror @X[($i+14)%16],$sigma1[1],$t1
149 xor $a1,$a0,$a0
150 _shr @X[($i+1)%16],$sigma0[2],$a1
151 xor $t1,$t0,$t0
152 _shr @X[($i+14)%16],$sigma1[2],$t1
153 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
154 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
155 $LDM $SZ($Tbl),$t1
156 addl $a0,@X[$i],@X[$i]
157 addl $t0,@X[$i],@X[$i]
158___
159$code.=<<___ if ($i==15);
160 extru $t1,31,10,$a1
161 comiclr,<> $LAST10BITS,$a1,%r0
162 ldo 1($Tbl),$Tbl ; signal end of $Tbl
163___
164&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
165}
166
167$code=<<___;
168 .LEVEL $LEVEL
169 .SPACE \$TEXT\$
170 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
171
172 .ALIGN 64
173L\$table
174___
175$code.=<<___ if ($SZ==8);
176 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
177 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
178 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
179 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
180 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
181 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
182 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
183 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
184 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
185 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
186 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
187 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
188 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
189 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
190 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
191 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
192 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
193 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
194 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
195 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
196 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
197 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
198 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
199 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
200 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
201 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
202 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
203 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
204 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
205 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
206 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
207 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
208 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
209 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
210 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
211 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
212 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
213 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
214 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
215 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
216___
217$code.=<<___ if ($SZ==4);
218 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
219 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
220 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
221 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
222 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
223 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
224 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
225 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
226 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
227 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
228 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
229 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
230 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
231 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
232 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
233 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
234___
235$code.=<<___;
236
237 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
238 .ALIGN 64
239$func
240 .PROC
241 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
242 .ENTRY
243 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
244 $PUSHMA %r3,$FRAME(%sp)
245 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
246 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
247 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
248 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
249 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
250 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
251 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
252 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
253 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
254 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
255 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
256 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
257 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
258 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
259 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
260
261 _shl $num,`log(16*$SZ)/log(2)`,$num
262 addl $inp,$num,$num ; $num to point at the end of $inp
263
264 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
265 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
266 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
267
268 blr %r0,$Tbl
269 ldi 3,$t1
270L\$pic
271 andcm $Tbl,$t1,$Tbl ; wipe privilege level
272 ldo L\$table-L\$pic($Tbl),$Tbl
273___
274$code.=<<___ if ($SZ==8 && $SIZE_T==4);
275 ldi 31,$t1
276 mtctl $t1,%cr11
277 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
278 b L\$parisc1
279 nop
280___
281$code.=<<___;
282 $LD `0*$SZ`($ctx),$A ; load context
283 $LD `1*$SZ`($ctx),$B
284 $LD `2*$SZ`($ctx),$C
285 $LD `3*$SZ`($ctx),$D
286 $LD `4*$SZ`($ctx),$E
287 $LD `5*$SZ`($ctx),$F
288 $LD `6*$SZ`($ctx),$G
289 $LD `7*$SZ`($ctx),$H
290
291 extru $inp,31,`log($SZ)/log(2)`,$t0
292 sh3addl $t0,%r0,$t0
293 subi `8*$SZ`,$t0,$t0
294 mtctl $t0,%cr11 ; load %sar with align factor
295
296L\$oop
297 ldi `$SZ-1`,$t0
298 $LDM $SZ($Tbl),$t1
299 andcm $inp,$t0,$t0 ; align $inp
300___
301 for ($i=0;$i<15;$i++) { # load input block
302 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
303$code.=<<___;
304 cmpb,*= $inp,$t0,L\$aligned
305 $LD `$SZ*15`($t0),@X[15]
306 $LD `$SZ*16`($t0),@X[16]
307___
308 for ($i=0;$i<16;$i++) { # align data
309 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
310$code.=<<___;
311L\$aligned
312 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
313___
314
315for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
316$code.=<<___;
317L\$rounds
318 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
319___
320for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
321$code.=<<___;
322 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
323 nop
324
325 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
326 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
327 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
328 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
329
330 $LD `0*$SZ`($ctx),@X[0] ; load context
331 $LD `1*$SZ`($ctx),@X[1]
332 $LD `2*$SZ`($ctx),@X[2]
333 $LD `3*$SZ`($ctx),@X[3]
334 $LD `4*$SZ`($ctx),@X[4]
335 $LD `5*$SZ`($ctx),@X[5]
336 addl @X[0],$A,$A
337 $LD `6*$SZ`($ctx),@X[6]
338 addl @X[1],$B,$B
339 $LD `7*$SZ`($ctx),@X[7]
340 ldo `16*$SZ`($inp),$inp ; advance $inp
341
342 $ST $A,`0*$SZ`($ctx) ; save context
343 addl @X[2],$C,$C
344 $ST $B,`1*$SZ`($ctx)
345 addl @X[3],$D,$D
346 $ST $C,`2*$SZ`($ctx)
347 addl @X[4],$E,$E
348 $ST $D,`3*$SZ`($ctx)
349 addl @X[5],$F,$F
350 $ST $E,`4*$SZ`($ctx)
351 addl @X[6],$G,$G
352 $ST $F,`5*$SZ`($ctx)
353 addl @X[7],$H,$H
354 $ST $G,`6*$SZ`($ctx)
355 $ST $H,`7*$SZ`($ctx)
356
357 cmpb,*<>,n $inp,$num,L\$oop
358 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
359___
360if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
361{{
362$code.=<<___;
363 b L\$done
364 nop
365
366 .ALIGN 64
367L\$parisc1
368___
369
370@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
609b0852 371 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
a83f83aa
AP
372 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
373 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
374$a0 ="%r17";
375$a1 ="%r18";
376$a2 ="%r19";
377$a3 ="%r20";
378$t0 ="%r21";
379$t1 ="%r22";
380$t2 ="%r28";
381$t3 ="%r29";
382$Tbl="%r31";
383
384@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
385
386sub ROUND_00_15_pa1 {
387my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
388 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
389my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
390
391$code.=<<___ if (!$flag);
392 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
393 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
394___
395$code.=<<___;
396 shd $ehi,$elo,$Sigma1[0],$t0
397 add $Xlo,$hlo,$hlo
398 shd $elo,$ehi,$Sigma1[0],$t1
399 addc $Xhi,$hhi,$hhi ; h += X[i]
400 shd $ehi,$elo,$Sigma1[1],$t2
b57599b7 401 ldwm 8($Tbl),$Xhi
a83f83aa
AP
402 shd $elo,$ehi,$Sigma1[1],$t3
403 ldw -4($Tbl),$Xlo ; load K[i]
404 xor $t2,$t0,$t0
405 xor $t3,$t1,$t1
406 and $flo,$elo,$a0
407 and $fhi,$ehi,$a1
408 shd $ehi,$elo,$Sigma1[2],$t2
409 andcm $glo,$elo,$a2
410 shd $elo,$ehi,$Sigma1[2],$t3
411 andcm $ghi,$ehi,$a3
412 xor $t2,$t0,$t0
413 xor $t3,$t1,$t1 ; Sigma1(e)
414 add $Xlo,$hlo,$hlo
415 xor $a2,$a0,$a0
416 addc $Xhi,$hhi,$hhi ; h += K[i]
417 xor $a3,$a1,$a1 ; Ch(e,f,g)
418
419 add $t0,$hlo,$hlo
420 shd $ahi,$alo,$Sigma0[0],$t0
421 addc $t1,$hhi,$hhi ; h += Sigma1(e)
609b0852 422 shd $alo,$ahi,$Sigma0[0],$t1
a83f83aa
AP
423 add $a0,$hlo,$hlo
424 shd $ahi,$alo,$Sigma0[1],$t2
425 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
426 shd $alo,$ahi,$Sigma0[1],$t3
427
428 xor $t2,$t0,$t0
429 xor $t3,$t1,$t1
430 shd $ahi,$alo,$Sigma0[2],$t2
431 and $alo,$blo,$a0
432 shd $alo,$ahi,$Sigma0[2],$t3
433 and $ahi,$bhi,$a1
434 xor $t2,$t0,$t0
435 xor $t3,$t1,$t1 ; Sigma0(a)
436
437 and $alo,$clo,$a2
438 and $ahi,$chi,$a3
439 xor $a2,$a0,$a0
440 add $hlo,$dlo,$dlo
441 xor $a3,$a1,$a1
442 addc $hhi,$dhi,$dhi ; d += h
443 and $blo,$clo,$a2
444 add $t0,$hlo,$hlo
445 and $bhi,$chi,$a3
446 addc $t1,$hhi,$hhi ; h += Sigma0(a)
447 xor $a2,$a0,$a0
448 add $a0,$hlo,$hlo
449 xor $a3,$a1,$a1 ; Maj(a,b,c)
450 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
451
452___
453$code.=<<___ if ($i==15 && $flag);
454 extru $Xlo,31,10,$Xlo
455 comiclr,= $LAST10BITS,$Xlo,%r0
456 b L\$rounds_pa1
457 nop
458___
459push(@X,shift(@X)); push(@X,shift(@X));
460}
461
462sub ROUND_16_xx_pa1 {
463my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
464my ($i)=shift;
465$i-=16;
466$code.=<<___;
467 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
468 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
469 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
470 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
471 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
472 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
473 shd $Xnhi,$Xnlo,$sigma0[0],$t0
474 shd $Xnlo,$Xnhi,$sigma0[0],$t1
475 add $a0,$Xlo,$Xlo
476 shd $Xnhi,$Xnlo,$sigma0[1],$t2
477 addc $a1,$Xhi,$Xhi
478 shd $Xnlo,$Xnhi,$sigma0[1],$t3
479 xor $t2,$t0,$t0
480 shd $Xnhi,$Xnlo,$sigma0[2],$t2
481 xor $t3,$t1,$t1
482 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
483 xor $t2,$t0,$t0
484 shd $a3,$a2,$sigma1[0],$a0
485 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
486 shd $a2,$a3,$sigma1[0],$a1
487 add $t0,$Xlo,$Xlo
488 shd $a3,$a2,$sigma1[1],$t2
489 addc $t1,$Xhi,$Xhi
490 shd $a2,$a3,$sigma1[1],$t3
491 xor $t2,$a0,$a0
492 shd $a3,$a2,$sigma1[2],$t2
493 xor $t3,$a1,$a1
494 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
495 xor $t2,$a0,$a0
496 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
497 add $a0,$Xlo,$Xlo
498 addc $a1,$Xhi,$Xhi
499
500 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
501 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
502___
503&ROUND_00_15_pa1($i,@_,1);
504}
505$code.=<<___;
506 ldw `0*4`($ctx),$Ahi ; load context
507 ldw `1*4`($ctx),$Alo
508 ldw `2*4`($ctx),$Bhi
509 ldw `3*4`($ctx),$Blo
510 ldw `4*4`($ctx),$Chi
511 ldw `5*4`($ctx),$Clo
512 ldw `6*4`($ctx),$Dhi
513 ldw `7*4`($ctx),$Dlo
514 ldw `8*4`($ctx),$Ehi
515 ldw `9*4`($ctx),$Elo
516 ldw `10*4`($ctx),$Fhi
517 ldw `11*4`($ctx),$Flo
518 ldw `12*4`($ctx),$Ghi
519 ldw `13*4`($ctx),$Glo
520 ldw `14*4`($ctx),$Hhi
521 ldw `15*4`($ctx),$Hlo
522
523 extru $inp,31,2,$t0
524 sh3addl $t0,%r0,$t0
525 subi 32,$t0,$t0
526 mtctl $t0,%cr11 ; load %sar with align factor
527
528L\$oop_pa1
529 extru $inp,31,2,$a3
530 comib,= 0,$a3,L\$aligned_pa1
531 sub $inp,$a3,$inp
532
533 ldw `0*4`($inp),$X[0]
534 ldw `1*4`($inp),$X[1]
535 ldw `2*4`($inp),$t2
536 ldw `3*4`($inp),$t3
537 ldw `4*4`($inp),$a0
538 ldw `5*4`($inp),$a1
539 ldw `6*4`($inp),$a2
540 ldw `7*4`($inp),$a3
541 vshd $X[0],$X[1],$X[0]
542 vshd $X[1],$t2,$X[1]
543 stw $X[0],`-$XOFF+0*4`(%sp)
544 ldw `8*4`($inp),$t0
545 vshd $t2,$t3,$t2
546 stw $X[1],`-$XOFF+1*4`(%sp)
547 ldw `9*4`($inp),$t1
548 vshd $t3,$a0,$t3
549___
550{
551my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
552for ($i=2;$i<=(128/4-8);$i++) {
553$code.=<<___;
554 stw $t[0],`-$XOFF+$i*4`(%sp)
555 ldw `(8+$i)*4`($inp),$t[0]
556 vshd $t[1],$t[2],$t[1]
557___
558push(@t,shift(@t));
559}
560for (;$i<(128/4-1);$i++) {
561$code.=<<___;
562 stw $t[0],`-$XOFF+$i*4`(%sp)
563 vshd $t[1],$t[2],$t[1]
564___
565push(@t,shift(@t));
566}
567$code.=<<___;
568 b L\$collected_pa1
569 stw $t[0],`-$XOFF+$i*4`(%sp)
570
571___
572}
573$code.=<<___;
574L\$aligned_pa1
575 ldw `0*4`($inp),$X[0]
576 ldw `1*4`($inp),$X[1]
577 ldw `2*4`($inp),$t2
578 ldw `3*4`($inp),$t3
579 ldw `4*4`($inp),$a0
580 ldw `5*4`($inp),$a1
581 ldw `6*4`($inp),$a2
582 ldw `7*4`($inp),$a3
583 stw $X[0],`-$XOFF+0*4`(%sp)
584 ldw `8*4`($inp),$t0
585 stw $X[1],`-$XOFF+1*4`(%sp)
586 ldw `9*4`($inp),$t1
587___
588{
589my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
590for ($i=2;$i<(128/4-8);$i++) {
591$code.=<<___;
592 stw $t[0],`-$XOFF+$i*4`(%sp)
593 ldw `(8+$i)*4`($inp),$t[0]
594___
595push(@t,shift(@t));
596}
597for (;$i<128/4;$i++) {
598$code.=<<___;
599 stw $t[0],`-$XOFF+$i*4`(%sp)
600___
601push(@t,shift(@t));
602}
603$code.="L\$collected_pa1\n";
604}
605
606for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
607$code.="L\$rounds_pa1\n";
608for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
609
610$code.=<<___;
611 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
612 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
613 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
614 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
615
616 ldw `0*4`($ctx),$t1 ; update context
617 ldw `1*4`($ctx),$t0
618 ldw `2*4`($ctx),$t3
619 ldw `3*4`($ctx),$t2
620 ldw `4*4`($ctx),$a1
621 ldw `5*4`($ctx),$a0
622 ldw `6*4`($ctx),$a3
623 add $t0,$Alo,$Alo
624 ldw `7*4`($ctx),$a2
625 addc $t1,$Ahi,$Ahi
626 ldw `8*4`($ctx),$t1
627 add $t2,$Blo,$Blo
628 ldw `9*4`($ctx),$t0
629 addc $t3,$Bhi,$Bhi
630 ldw `10*4`($ctx),$t3
631 add $a0,$Clo,$Clo
632 ldw `11*4`($ctx),$t2
633 addc $a1,$Chi,$Chi
634 ldw `12*4`($ctx),$a1
635 add $a2,$Dlo,$Dlo
636 ldw `13*4`($ctx),$a0
637 addc $a3,$Dhi,$Dhi
638 ldw `14*4`($ctx),$a3
639 add $t0,$Elo,$Elo
640 ldw `15*4`($ctx),$a2
641 addc $t1,$Ehi,$Ehi
642 stw $Ahi,`0*4`($ctx)
643 add $t2,$Flo,$Flo
644 stw $Alo,`1*4`($ctx)
645 addc $t3,$Fhi,$Fhi
646 stw $Bhi,`2*4`($ctx)
647 add $a0,$Glo,$Glo
648 stw $Blo,`3*4`($ctx)
649 addc $a1,$Ghi,$Ghi
650 stw $Chi,`4*4`($ctx)
651 add $a2,$Hlo,$Hlo
652 stw $Clo,`5*4`($ctx)
653 addc $a3,$Hhi,$Hhi
654 stw $Dhi,`6*4`($ctx)
655 ldo `16*$SZ`($inp),$inp ; advance $inp
656 stw $Dlo,`7*4`($ctx)
657 stw $Ehi,`8*4`($ctx)
658 stw $Elo,`9*4`($ctx)
659 stw $Fhi,`10*4`($ctx)
660 stw $Flo,`11*4`($ctx)
661 stw $Ghi,`12*4`($ctx)
662 stw $Glo,`13*4`($ctx)
663 stw $Hhi,`14*4`($ctx)
664 comb,= $inp,$num,L\$done
665 stw $Hlo,`15*4`($ctx)
666 b L\$oop_pa1
667 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
668L\$done
669___
670}}
671$code.=<<___;
672 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
673 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
674 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
675 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
676 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
677 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
678 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
679 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
680 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
681 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
682 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
683 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
684 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
685 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
686 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
687 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
688 bv (%r2)
689 .EXIT
690 $POPMB -$FRAME(%sp),%r3
691 .PROCEND
692 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
693___
694
695# Explicitly encode PA-RISC 2.0 instructions used in this module, so
696# that it can be compiled with .LEVEL 1.0. It should be noted that I
697# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
698# directive...
699
700my $ldd = sub {
701 my ($mod,$args) = @_;
702 my $orig = "ldd$mod\t$args";
703
3fc2efd2 704 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
b57599b7
AP
705 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
706 $opcode|=(1<<3) if ($mod =~ /^,m/);
707 $opcode|=(1<<2) if ($mod =~ /^,mb/);
a83f83aa
AP
708 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
709 }
710 else { "\t".$orig; }
711};
712
713my $std = sub {
714 my ($mod,$args) = @_;
715 my $orig = "std$mod\t$args";
716
3fc2efd2 717 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
b57599b7
AP
718 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
719 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
a83f83aa
AP
720 }
721 else { "\t".$orig; }
722};
723
724my $extrd = sub {
725 my ($mod,$args) = @_;
726 my $orig = "extrd$mod\t$args";
727
728 # I only have ",u" completer, it's implicitly encoded...
729 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
730 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
731 my $len=32-$3;
732 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
733 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
734 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
735 }
736 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
737 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
738 my $len=32-$2;
739 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
740 $opcode |= (1<<13) if ($mod =~ /,\**=/);
741 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
742 }
743 else { "\t".$orig; }
744};
745
746my $shrpd = sub {
747 my ($mod,$args) = @_;
748 my $orig = "shrpd$mod\t$args";
749
750 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
751 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
752 my $cpos=63-$3;
753 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
754 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
755 }
756 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
757 { sprintf "\t.WORD\t0x%08x\t; %s",
758 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
759 }
760 else { "\t".$orig; }
761};
762
763sub assemble {
764 my ($mnemonic,$mod,$args)=@_;
765 my $opcode = eval("\$$mnemonic");
766
767 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
768}
769
770foreach (split("\n",$code)) {
771 s/\`([^\`]*)\`/eval $1/ge;
772
773 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
774 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
775 : sprintf("shd\t%$1,%$2,%d",$3)/e or
46f4e1be 776 # translate made up instructions: _ror, _shr, _align, _shl
a83f83aa
AP
777 s/_ror(\s+)(%r[0-9]+),/
778 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
779
780 s/_shr(\s+%r[0-9]+),([0-9]+),/
781 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
782 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
783
784 s/_align(\s+%r[0-9]+,%r[0-9]+),/
785 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
786
787 s/_shl(\s+%r[0-9]+),([0-9]+),/
788 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
789 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
790
791 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
792
793 s/cmpb,\*/comb,/ if ($SIZE_T==4);
794
02450ec6
AP
795 s/\bbv\b/bve/ if ($SIZE_T==8);
796
a83f83aa
AP
797 print $_,"\n";
798}
799
800close STDOUT;