]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/asm/ghash-parisc.pl
Update copyright year
[thirdparty/openssl.git] / crypto / modes / asm / ghash-parisc.pl
1 #! /usr/bin/env perl
2 # Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # April 2010
18 #
19 # The module implements "4-bit" GCM GHASH function and underlying
20 # single multiplication operation in GF(2^128). "4-bit" means that it
21 # uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
22 # it processes one byte in 19.6 cycles, which is more than twice as
23 # fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
24 # 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
25 # processed byte. This is ~2.2x faster than 64-bit code generated by
26 # vendor compiler (which used to be very hard to beat:-).
27 #
28 # Special thanks to polarhome.com for providing HP-UX account.
29
30 # $output is the last argument if it looks like a file (it has an extension)
31 # $flavour is the first argument if it doesn't look like a file
32 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
33 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
34
35 $output and open STDOUT,">$output";
36
37 if ($flavour =~ /64/) {
38 $LEVEL ="2.0W";
39 $SIZE_T =8;
40 $FRAME_MARKER =80;
41 $SAVED_RP =16;
42 $PUSH ="std";
43 $PUSHMA ="std,ma";
44 $POP ="ldd";
45 $POPMB ="ldd,mb";
46 $NREGS =6;
47 } else {
48 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
49 $SIZE_T =4;
50 $FRAME_MARKER =48;
51 $SAVED_RP =20;
52 $PUSH ="stw";
53 $PUSHMA ="stwm";
54 $POP ="ldw";
55 $POPMB ="ldwm";
56 $NREGS =11;
57 }
58
59 $FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
60 # [+ argument transfer]
61
62 ################# volatile registers
63 $Xi="%r26"; # argument block
64 $Htbl="%r25";
65 $inp="%r24";
66 $len="%r23";
67 $Hhh=$Htbl; # variables
68 $Hll="%r22";
69 $Zhh="%r21";
70 $Zll="%r20";
71 $cnt="%r19";
72 $rem_4bit="%r28";
73 $rem="%r29";
74 $mask0xf0="%r31";
75
76 ################# preserved registers
77 $Thh="%r1";
78 $Tll="%r2";
79 $nlo="%r3";
80 $nhi="%r4";
81 $byte="%r5";
82 if ($SIZE_T==4) {
83 $Zhl="%r6";
84 $Zlh="%r7";
85 $Hhl="%r8";
86 $Hlh="%r9";
87 $Thl="%r10";
88 $Tlh="%r11";
89 }
90 $rem2="%r6"; # used in PA-RISC 2.0 code
91
92 $code.=<<___;
93 .LEVEL $LEVEL
94 .SPACE \$TEXT\$
95 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
96
97 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
98 .ALIGN 64
99 gcm_gmult_4bit
100 .PROC
101 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
102 .ENTRY
103 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
104 $PUSHMA %r3,$FRAME(%sp)
105 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
106 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
107 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
108 ___
109 $code.=<<___ if ($SIZE_T==4);
110 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
111 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
112 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
113 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
114 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
115 ___
116 $code.=<<___;
117 blr %r0,$rem_4bit
118 ldi 3,$rem
119 L\$pic_gmult
120 andcm $rem_4bit,$rem,$rem_4bit
121 addl $inp,$len,$len
122 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
123 ldi 0xf0,$mask0xf0
124 ___
125 $code.=<<___ if ($SIZE_T==4);
126 ldi 31,$rem
127 mtctl $rem,%cr11
128 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
129 b L\$parisc1_gmult
130 nop
131 ___
132 \f
133 $code.=<<___;
134 ldb 15($Xi),$nlo
135 ldo 8($Htbl),$Hll
136
137 and $mask0xf0,$nlo,$nhi
138 depd,z $nlo,59,4,$nlo
139
140 ldd $nlo($Hll),$Zll
141 ldd $nlo($Hhh),$Zhh
142
143 depd,z $Zll,60,4,$rem
144 shrpd $Zhh,$Zll,4,$Zll
145 extrd,u $Zhh,59,60,$Zhh
146 ldb 14($Xi),$nlo
147
148 ldd $nhi($Hll),$Tll
149 ldd $nhi($Hhh),$Thh
150 and $mask0xf0,$nlo,$nhi
151 depd,z $nlo,59,4,$nlo
152
153 xor $Tll,$Zll,$Zll
154 xor $Thh,$Zhh,$Zhh
155 ldd $rem($rem_4bit),$rem
156 b L\$oop_gmult_pa2
157 ldi 13,$cnt
158
159 .ALIGN 8
160 L\$oop_gmult_pa2
161 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
162 depd,z $Zll,60,4,$rem
163
164 shrpd $Zhh,$Zll,4,$Zll
165 extrd,u $Zhh,59,60,$Zhh
166 ldd $nlo($Hll),$Tll
167 ldd $nlo($Hhh),$Thh
168
169 xor $Tll,$Zll,$Zll
170 xor $Thh,$Zhh,$Zhh
171 ldd $rem($rem_4bit),$rem
172
173 xor $rem,$Zhh,$Zhh
174 depd,z $Zll,60,4,$rem
175 ldbx $cnt($Xi),$nlo
176
177 shrpd $Zhh,$Zll,4,$Zll
178 extrd,u $Zhh,59,60,$Zhh
179 ldd $nhi($Hll),$Tll
180 ldd $nhi($Hhh),$Thh
181
182 and $mask0xf0,$nlo,$nhi
183 depd,z $nlo,59,4,$nlo
184 ldd $rem($rem_4bit),$rem
185
186 xor $Tll,$Zll,$Zll
187 addib,uv -1,$cnt,L\$oop_gmult_pa2
188 xor $Thh,$Zhh,$Zhh
189
190 xor $rem,$Zhh,$Zhh
191 depd,z $Zll,60,4,$rem
192
193 shrpd $Zhh,$Zll,4,$Zll
194 extrd,u $Zhh,59,60,$Zhh
195 ldd $nlo($Hll),$Tll
196 ldd $nlo($Hhh),$Thh
197
198 xor $Tll,$Zll,$Zll
199 xor $Thh,$Zhh,$Zhh
200 ldd $rem($rem_4bit),$rem
201
202 xor $rem,$Zhh,$Zhh
203 depd,z $Zll,60,4,$rem
204
205 shrpd $Zhh,$Zll,4,$Zll
206 extrd,u $Zhh,59,60,$Zhh
207 ldd $nhi($Hll),$Tll
208 ldd $nhi($Hhh),$Thh
209
210 xor $Tll,$Zll,$Zll
211 xor $Thh,$Zhh,$Zhh
212 ldd $rem($rem_4bit),$rem
213
214 xor $rem,$Zhh,$Zhh
215 std $Zll,8($Xi)
216 std $Zhh,0($Xi)
217 ___
218 \f
219 $code.=<<___ if ($SIZE_T==4);
220 b L\$done_gmult
221 nop
222
223 L\$parisc1_gmult
224 ldb 15($Xi),$nlo
225 ldo 12($Htbl),$Hll
226 ldo 8($Htbl),$Hlh
227 ldo 4($Htbl),$Hhl
228
229 and $mask0xf0,$nlo,$nhi
230 zdep $nlo,27,4,$nlo
231
232 ldwx $nlo($Hll),$Zll
233 ldwx $nlo($Hlh),$Zlh
234 ldwx $nlo($Hhl),$Zhl
235 ldwx $nlo($Hhh),$Zhh
236 zdep $Zll,28,4,$rem
237 ldb 14($Xi),$nlo
238 ldwx $rem($rem_4bit),$rem
239 shrpw $Zlh,$Zll,4,$Zll
240 ldwx $nhi($Hll),$Tll
241 shrpw $Zhl,$Zlh,4,$Zlh
242 ldwx $nhi($Hlh),$Tlh
243 shrpw $Zhh,$Zhl,4,$Zhl
244 ldwx $nhi($Hhl),$Thl
245 extru $Zhh,27,28,$Zhh
246 ldwx $nhi($Hhh),$Thh
247 xor $rem,$Zhh,$Zhh
248 and $mask0xf0,$nlo,$nhi
249 zdep $nlo,27,4,$nlo
250
251 xor $Tll,$Zll,$Zll
252 ldwx $nlo($Hll),$Tll
253 xor $Tlh,$Zlh,$Zlh
254 ldwx $nlo($Hlh),$Tlh
255 xor $Thl,$Zhl,$Zhl
256 b L\$oop_gmult_pa1
257 ldi 13,$cnt
258
259 .ALIGN 8
260 L\$oop_gmult_pa1
261 zdep $Zll,28,4,$rem
262 ldwx $nlo($Hhl),$Thl
263 xor $Thh,$Zhh,$Zhh
264 ldwx $rem($rem_4bit),$rem
265 shrpw $Zlh,$Zll,4,$Zll
266 ldwx $nlo($Hhh),$Thh
267 shrpw $Zhl,$Zlh,4,$Zlh
268 ldbx $cnt($Xi),$nlo
269 xor $Tll,$Zll,$Zll
270 ldwx $nhi($Hll),$Tll
271 shrpw $Zhh,$Zhl,4,$Zhl
272 xor $Tlh,$Zlh,$Zlh
273 ldwx $nhi($Hlh),$Tlh
274 extru $Zhh,27,28,$Zhh
275 xor $Thl,$Zhl,$Zhl
276 ldwx $nhi($Hhl),$Thl
277 xor $rem,$Zhh,$Zhh
278 zdep $Zll,28,4,$rem
279 xor $Thh,$Zhh,$Zhh
280 ldwx $nhi($Hhh),$Thh
281 shrpw $Zlh,$Zll,4,$Zll
282 ldwx $rem($rem_4bit),$rem
283 shrpw $Zhl,$Zlh,4,$Zlh
284 shrpw $Zhh,$Zhl,4,$Zhl
285 and $mask0xf0,$nlo,$nhi
286 extru $Zhh,27,28,$Zhh
287 zdep $nlo,27,4,$nlo
288 xor $Tll,$Zll,$Zll
289 ldwx $nlo($Hll),$Tll
290 xor $Tlh,$Zlh,$Zlh
291 ldwx $nlo($Hlh),$Tlh
292 xor $rem,$Zhh,$Zhh
293 addib,uv -1,$cnt,L\$oop_gmult_pa1
294 xor $Thl,$Zhl,$Zhl
295
296 zdep $Zll,28,4,$rem
297 ldwx $nlo($Hhl),$Thl
298 xor $Thh,$Zhh,$Zhh
299 ldwx $rem($rem_4bit),$rem
300 shrpw $Zlh,$Zll,4,$Zll
301 ldwx $nlo($Hhh),$Thh
302 shrpw $Zhl,$Zlh,4,$Zlh
303 xor $Tll,$Zll,$Zll
304 ldwx $nhi($Hll),$Tll
305 shrpw $Zhh,$Zhl,4,$Zhl
306 xor $Tlh,$Zlh,$Zlh
307 ldwx $nhi($Hlh),$Tlh
308 extru $Zhh,27,28,$Zhh
309 xor $rem,$Zhh,$Zhh
310 xor $Thl,$Zhl,$Zhl
311 ldwx $nhi($Hhl),$Thl
312 xor $Thh,$Zhh,$Zhh
313 ldwx $nhi($Hhh),$Thh
314 zdep $Zll,28,4,$rem
315 ldwx $rem($rem_4bit),$rem
316 shrpw $Zlh,$Zll,4,$Zll
317 shrpw $Zhl,$Zlh,4,$Zlh
318 shrpw $Zhh,$Zhl,4,$Zhl
319 extru $Zhh,27,28,$Zhh
320 xor $Tll,$Zll,$Zll
321 xor $Tlh,$Zlh,$Zlh
322 xor $rem,$Zhh,$Zhh
323 stw $Zll,12($Xi)
324 xor $Thl,$Zhl,$Zhl
325 stw $Zlh,8($Xi)
326 xor $Thh,$Zhh,$Zhh
327 stw $Zhl,4($Xi)
328 stw $Zhh,0($Xi)
329 ___
330 $code.=<<___;
331 L\$done_gmult
332 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
333 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
334 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
335 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
336 ___
337 $code.=<<___ if ($SIZE_T==4);
338 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
339 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
340 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
341 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
342 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
343 ___
344 $code.=<<___;
345 bv (%r2)
346 .EXIT
347 $POPMB -$FRAME(%sp),%r3
348 .PROCEND
349
350 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
351 .ALIGN 64
352 gcm_ghash_4bit
353 .PROC
354 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
355 .ENTRY
356 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
357 $PUSHMA %r3,$FRAME(%sp)
358 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
359 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
360 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
361 ___
362 $code.=<<___ if ($SIZE_T==4);
363 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
364 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
365 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
366 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
367 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
368 ___
369 $code.=<<___;
370 blr %r0,$rem_4bit
371 ldi 3,$rem
372 L\$pic_ghash
373 andcm $rem_4bit,$rem,$rem_4bit
374 addl $inp,$len,$len
375 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
376 ldi 0xf0,$mask0xf0
377 ___
378 $code.=<<___ if ($SIZE_T==4);
379 ldi 31,$rem
380 mtctl $rem,%cr11
381 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
382 b L\$parisc1_ghash
383 nop
384 ___
385 \f\f
386 $code.=<<___;
387 ldb 15($Xi),$nlo
388 ldo 8($Htbl),$Hll
389
390 L\$outer_ghash_pa2
391 ldb 15($inp),$nhi
392 xor $nhi,$nlo,$nlo
393 and $mask0xf0,$nlo,$nhi
394 depd,z $nlo,59,4,$nlo
395
396 ldd $nlo($Hll),$Zll
397 ldd $nlo($Hhh),$Zhh
398
399 depd,z $Zll,60,4,$rem
400 shrpd $Zhh,$Zll,4,$Zll
401 extrd,u $Zhh,59,60,$Zhh
402 ldb 14($Xi),$nlo
403 ldb 14($inp),$byte
404
405 ldd $nhi($Hll),$Tll
406 ldd $nhi($Hhh),$Thh
407 xor $byte,$nlo,$nlo
408 and $mask0xf0,$nlo,$nhi
409 depd,z $nlo,59,4,$nlo
410
411 xor $Tll,$Zll,$Zll
412 xor $Thh,$Zhh,$Zhh
413 ldd $rem($rem_4bit),$rem
414 b L\$oop_ghash_pa2
415 ldi 13,$cnt
416
417 .ALIGN 8
418 L\$oop_ghash_pa2
419 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
420 depd,z $Zll,60,4,$rem2
421
422 shrpd $Zhh,$Zll,4,$Zll
423 extrd,u $Zhh,59,60,$Zhh
424 ldd $nlo($Hll),$Tll
425 ldd $nlo($Hhh),$Thh
426
427 xor $Tll,$Zll,$Zll
428 xor $Thh,$Zhh,$Zhh
429 ldbx $cnt($Xi),$nlo
430 ldbx $cnt($inp),$byte
431
432 depd,z $Zll,60,4,$rem
433 shrpd $Zhh,$Zll,4,$Zll
434 ldd $rem2($rem_4bit),$rem2
435
436 xor $rem2,$Zhh,$Zhh
437 xor $byte,$nlo,$nlo
438 ldd $nhi($Hll),$Tll
439 ldd $nhi($Hhh),$Thh
440
441 and $mask0xf0,$nlo,$nhi
442 depd,z $nlo,59,4,$nlo
443
444 extrd,u $Zhh,59,60,$Zhh
445 xor $Tll,$Zll,$Zll
446
447 ldd $rem($rem_4bit),$rem
448 addib,uv -1,$cnt,L\$oop_ghash_pa2
449 xor $Thh,$Zhh,$Zhh
450
451 xor $rem,$Zhh,$Zhh
452 depd,z $Zll,60,4,$rem2
453
454 shrpd $Zhh,$Zll,4,$Zll
455 extrd,u $Zhh,59,60,$Zhh
456 ldd $nlo($Hll),$Tll
457 ldd $nlo($Hhh),$Thh
458
459 xor $Tll,$Zll,$Zll
460 xor $Thh,$Zhh,$Zhh
461
462 depd,z $Zll,60,4,$rem
463 shrpd $Zhh,$Zll,4,$Zll
464 ldd $rem2($rem_4bit),$rem2
465
466 xor $rem2,$Zhh,$Zhh
467 ldd $nhi($Hll),$Tll
468 ldd $nhi($Hhh),$Thh
469
470 extrd,u $Zhh,59,60,$Zhh
471 xor $Tll,$Zll,$Zll
472 xor $Thh,$Zhh,$Zhh
473 ldd $rem($rem_4bit),$rem
474
475 xor $rem,$Zhh,$Zhh
476 std $Zll,8($Xi)
477 ldo 16($inp),$inp
478 std $Zhh,0($Xi)
479 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
480 copy $Zll,$nlo
481 ___
482 \f
483 $code.=<<___ if ($SIZE_T==4);
484 b L\$done_ghash
485 nop
486
487 L\$parisc1_ghash
488 ldb 15($Xi),$nlo
489 ldo 12($Htbl),$Hll
490 ldo 8($Htbl),$Hlh
491 ldo 4($Htbl),$Hhl
492
493 L\$outer_ghash_pa1
494 ldb 15($inp),$byte
495 xor $byte,$nlo,$nlo
496 and $mask0xf0,$nlo,$nhi
497 zdep $nlo,27,4,$nlo
498
499 ldwx $nlo($Hll),$Zll
500 ldwx $nlo($Hlh),$Zlh
501 ldwx $nlo($Hhl),$Zhl
502 ldwx $nlo($Hhh),$Zhh
503 zdep $Zll,28,4,$rem
504 ldb 14($Xi),$nlo
505 ldb 14($inp),$byte
506 ldwx $rem($rem_4bit),$rem
507 shrpw $Zlh,$Zll,4,$Zll
508 ldwx $nhi($Hll),$Tll
509 shrpw $Zhl,$Zlh,4,$Zlh
510 ldwx $nhi($Hlh),$Tlh
511 shrpw $Zhh,$Zhl,4,$Zhl
512 ldwx $nhi($Hhl),$Thl
513 extru $Zhh,27,28,$Zhh
514 ldwx $nhi($Hhh),$Thh
515 xor $byte,$nlo,$nlo
516 xor $rem,$Zhh,$Zhh
517 and $mask0xf0,$nlo,$nhi
518 zdep $nlo,27,4,$nlo
519
520 xor $Tll,$Zll,$Zll
521 ldwx $nlo($Hll),$Tll
522 xor $Tlh,$Zlh,$Zlh
523 ldwx $nlo($Hlh),$Tlh
524 xor $Thl,$Zhl,$Zhl
525 b L\$oop_ghash_pa1
526 ldi 13,$cnt
527
528 .ALIGN 8
529 L\$oop_ghash_pa1
530 zdep $Zll,28,4,$rem
531 ldwx $nlo($Hhl),$Thl
532 xor $Thh,$Zhh,$Zhh
533 ldwx $rem($rem_4bit),$rem
534 shrpw $Zlh,$Zll,4,$Zll
535 ldwx $nlo($Hhh),$Thh
536 shrpw $Zhl,$Zlh,4,$Zlh
537 ldbx $cnt($Xi),$nlo
538 xor $Tll,$Zll,$Zll
539 ldwx $nhi($Hll),$Tll
540 shrpw $Zhh,$Zhl,4,$Zhl
541 ldbx $cnt($inp),$byte
542 xor $Tlh,$Zlh,$Zlh
543 ldwx $nhi($Hlh),$Tlh
544 extru $Zhh,27,28,$Zhh
545 xor $Thl,$Zhl,$Zhl
546 ldwx $nhi($Hhl),$Thl
547 xor $rem,$Zhh,$Zhh
548 zdep $Zll,28,4,$rem
549 xor $Thh,$Zhh,$Zhh
550 ldwx $nhi($Hhh),$Thh
551 shrpw $Zlh,$Zll,4,$Zll
552 ldwx $rem($rem_4bit),$rem
553 shrpw $Zhl,$Zlh,4,$Zlh
554 xor $byte,$nlo,$nlo
555 shrpw $Zhh,$Zhl,4,$Zhl
556 and $mask0xf0,$nlo,$nhi
557 extru $Zhh,27,28,$Zhh
558 zdep $nlo,27,4,$nlo
559 xor $Tll,$Zll,$Zll
560 ldwx $nlo($Hll),$Tll
561 xor $Tlh,$Zlh,$Zlh
562 ldwx $nlo($Hlh),$Tlh
563 xor $rem,$Zhh,$Zhh
564 addib,uv -1,$cnt,L\$oop_ghash_pa1
565 xor $Thl,$Zhl,$Zhl
566
567 zdep $Zll,28,4,$rem
568 ldwx $nlo($Hhl),$Thl
569 xor $Thh,$Zhh,$Zhh
570 ldwx $rem($rem_4bit),$rem
571 shrpw $Zlh,$Zll,4,$Zll
572 ldwx $nlo($Hhh),$Thh
573 shrpw $Zhl,$Zlh,4,$Zlh
574 xor $Tll,$Zll,$Zll
575 ldwx $nhi($Hll),$Tll
576 shrpw $Zhh,$Zhl,4,$Zhl
577 xor $Tlh,$Zlh,$Zlh
578 ldwx $nhi($Hlh),$Tlh
579 extru $Zhh,27,28,$Zhh
580 xor $rem,$Zhh,$Zhh
581 xor $Thl,$Zhl,$Zhl
582 ldwx $nhi($Hhl),$Thl
583 xor $Thh,$Zhh,$Zhh
584 ldwx $nhi($Hhh),$Thh
585 zdep $Zll,28,4,$rem
586 ldwx $rem($rem_4bit),$rem
587 shrpw $Zlh,$Zll,4,$Zll
588 shrpw $Zhl,$Zlh,4,$Zlh
589 shrpw $Zhh,$Zhl,4,$Zhl
590 extru $Zhh,27,28,$Zhh
591 xor $Tll,$Zll,$Zll
592 xor $Tlh,$Zlh,$Zlh
593 xor $rem,$Zhh,$Zhh
594 stw $Zll,12($Xi)
595 xor $Thl,$Zhl,$Zhl
596 stw $Zlh,8($Xi)
597 xor $Thh,$Zhh,$Zhh
598 stw $Zhl,4($Xi)
599 ldo 16($inp),$inp
600 stw $Zhh,0($Xi)
601 comb,<> $inp,$len,L\$outer_ghash_pa1
602 copy $Zll,$nlo
603 ___
604 $code.=<<___;
605 L\$done_ghash
606 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
607 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
608 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
609 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
610 ___
611 $code.=<<___ if ($SIZE_T==4);
612 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
613 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
614 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
615 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
616 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
617 ___
618 $code.=<<___;
619 bv (%r2)
620 .EXIT
621 $POPMB -$FRAME(%sp),%r3
622 .PROCEND
623
624 .ALIGN 64
625 L\$rem_4bit
626 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
627 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
628 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
629 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
630 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
631 .ALIGN 64
632 ___
633
634 # Explicitly encode PA-RISC 2.0 instructions used in this module, so
635 # that it can be compiled with .LEVEL 1.0. It should be noted that I
636 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
637 # directive...
638
639 my $ldd = sub {
640 my ($mod,$args) = @_;
641 my $orig = "ldd$mod\t$args";
642
643 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
644 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
645 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
646 }
647 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
648 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
649 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
650 $opcode|=(1<<5) if ($mod =~ /^,m/);
651 $opcode|=(1<<13) if ($mod =~ /^,mb/);
652 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
653 }
654 else { "\t".$orig; }
655 };
656
657 my $std = sub {
658 my ($mod,$args) = @_;
659 my $orig = "std$mod\t$args";
660
661 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
662 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
663 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
664 }
665 else { "\t".$orig; }
666 };
667
668 my $extrd = sub {
669 my ($mod,$args) = @_;
670 my $orig = "extrd$mod\t$args";
671
672 # I only have ",u" completer, it's implicitly encoded...
673 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
674 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
675 my $len=32-$3;
676 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
677 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
678 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
679 }
680 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
681 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
682 my $len=32-$2;
683 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
684 $opcode |= (1<<13) if ($mod =~ /,\**=/);
685 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
686 }
687 else { "\t".$orig; }
688 };
689
690 my $shrpd = sub {
691 my ($mod,$args) = @_;
692 my $orig = "shrpd$mod\t$args";
693
694 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
695 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
696 my $cpos=63-$3;
697 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
698 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
699 }
700 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
701 { sprintf "\t.WORD\t0x%08x\t; %s",
702 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
703 }
704 else { "\t".$orig; }
705 };
706
707 my $depd = sub {
708 my ($mod,$args) = @_;
709 my $orig = "depd$mod\t$args";
710
711 # I only have ",z" completer, it's implicitly encoded...
712 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
713 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
714 my $cpos=63-$2;
715 my $len=32-$3;
716 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
717 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
718 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
719 }
720 else { "\t".$orig; }
721 };
722
723 sub assemble {
724 my ($mnemonic,$mod,$args)=@_;
725 my $opcode = eval("\$$mnemonic");
726
727 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
728 }
729
730 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
731 =~ /GNU assembler/) {
732 $gnuas = 1;
733 }
734
735 foreach (split("\n",$code)) {
736 s/\`([^\`]*)\`/eval $1/ge;
737 if ($SIZE_T==4) {
738 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
739 s/cmpb,\*/comb,/;
740 s/,\*/,/;
741 }
742
743 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
744 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
745 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
746 s/\bbv\b/bve/ if ($SIZE_T==8);
747
748 print $_,"\n";
749 }
750
751 close STDOUT or die "error closing STDOUT: $!";