]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/asm/ghash-parisc.pl
Many spelling fixes/typo's corrected.
[thirdparty/openssl.git] / crypto / modes / asm / ghash-parisc.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
5e19ee96
AP
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# April 2010
18#
19# The module implements "4-bit" GCM GHASH function and underlying
20# single multiplication operation in GF(2^128). "4-bit" means that it
21# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
d52d5ad1
AP
22# it processes one byte in 19.6 cycles, which is more than twice as
23# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
24# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
5e19ee96
AP
25# processed byte. This is ~2.2x faster than 64-bit code generated by
26# vendor compiler (which used to be very hard to beat:-).
27#
28# Special thanks to polarhome.com for providing HP-UX account.
29
30$flavour = shift;
31$output = shift;
32open STDOUT,">$output";
33
34if ($flavour =~ /64/) {
35 $LEVEL ="2.0W";
36 $SIZE_T =8;
37 $FRAME_MARKER =80;
38 $SAVED_RP =16;
39 $PUSH ="std";
40 $PUSHMA ="std,ma";
41 $POP ="ldd";
42 $POPMB ="ldd,mb";
43 $NREGS =6;
44} else {
45 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
46 $SIZE_T =4;
47 $FRAME_MARKER =48;
48 $SAVED_RP =20;
49 $PUSH ="stw";
50 $PUSHMA ="stwm";
51 $POP ="ldw";
52 $POPMB ="ldwm";
53 $NREGS =11;
54}
55
56$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
57 # [+ argument transfer]
58
59################# volatile registers
60$Xi="%r26"; # argument block
61$Htbl="%r25";
62$inp="%r24";
63$len="%r23";
64$Hhh=$Htbl; # variables
65$Hll="%r22";
66$Zhh="%r21";
67$Zll="%r20";
68$cnt="%r19";
69$rem_4bit="%r28";
70$rem="%r29";
71$mask0xf0="%r31";
72
73################# preserved registers
74$Thh="%r1";
75$Tll="%r2";
76$nlo="%r3";
77$nhi="%r4";
78$byte="%r5";
79if ($SIZE_T==4) {
80 $Zhl="%r6";
81 $Zlh="%r7";
82 $Hhl="%r8";
83 $Hlh="%r9";
84 $Thl="%r10";
85 $Tlh="%r11";
86}
87$rem2="%r6"; # used in PA-RISC 2.0 code
88
89$code.=<<___;
90 .LEVEL $LEVEL
91 .SPACE \$TEXT\$
92 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
93
94 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
95 .ALIGN 64
96gcm_gmult_4bit
97 .PROC
98 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
99 .ENTRY
100 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
101 $PUSHMA %r3,$FRAME(%sp)
102 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
103 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
104 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
105___
106$code.=<<___ if ($SIZE_T==4);
107 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
108 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
109 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
110 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
111 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
112___
113$code.=<<___;
114 blr %r0,$rem_4bit
115 ldi 3,$rem
116L\$pic_gmult
117 andcm $rem_4bit,$rem,$rem_4bit
118 addl $inp,$len,$len
119 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
120 ldi 0xf0,$mask0xf0
121___
122$code.=<<___ if ($SIZE_T==4);
123 ldi 31,$rem
124 mtctl $rem,%cr11
125 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
126 b L\$parisc1_gmult
127 nop
128___
129\f
130$code.=<<___;
131 ldb 15($Xi),$nlo
132 ldo 8($Htbl),$Hll
133
134 and $mask0xf0,$nlo,$nhi
135 depd,z $nlo,59,4,$nlo
136
137 ldd $nlo($Hll),$Zll
138 ldd $nlo($Hhh),$Zhh
139
140 depd,z $Zll,60,4,$rem
141 shrpd $Zhh,$Zll,4,$Zll
142 extrd,u $Zhh,59,60,$Zhh
143 ldb 14($Xi),$nlo
144
145 ldd $nhi($Hll),$Tll
146 ldd $nhi($Hhh),$Thh
147 and $mask0xf0,$nlo,$nhi
148 depd,z $nlo,59,4,$nlo
149
150 xor $Tll,$Zll,$Zll
151 xor $Thh,$Zhh,$Zhh
152 ldd $rem($rem_4bit),$rem
153 b L\$oop_gmult_pa2
154 ldi 13,$cnt
155
156 .ALIGN 8
157L\$oop_gmult_pa2
158 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
159 depd,z $Zll,60,4,$rem
160
161 shrpd $Zhh,$Zll,4,$Zll
162 extrd,u $Zhh,59,60,$Zhh
163 ldd $nlo($Hll),$Tll
164 ldd $nlo($Hhh),$Thh
165
166 xor $Tll,$Zll,$Zll
167 xor $Thh,$Zhh,$Zhh
168 ldd $rem($rem_4bit),$rem
169
170 xor $rem,$Zhh,$Zhh
171 depd,z $Zll,60,4,$rem
172 ldbx $cnt($Xi),$nlo
173
174 shrpd $Zhh,$Zll,4,$Zll
175 extrd,u $Zhh,59,60,$Zhh
176 ldd $nhi($Hll),$Tll
177 ldd $nhi($Hhh),$Thh
178
179 and $mask0xf0,$nlo,$nhi
180 depd,z $nlo,59,4,$nlo
181 ldd $rem($rem_4bit),$rem
182
183 xor $Tll,$Zll,$Zll
184 addib,uv -1,$cnt,L\$oop_gmult_pa2
185 xor $Thh,$Zhh,$Zhh
186
187 xor $rem,$Zhh,$Zhh
188 depd,z $Zll,60,4,$rem
189
190 shrpd $Zhh,$Zll,4,$Zll
191 extrd,u $Zhh,59,60,$Zhh
192 ldd $nlo($Hll),$Tll
193 ldd $nlo($Hhh),$Thh
194
195 xor $Tll,$Zll,$Zll
196 xor $Thh,$Zhh,$Zhh
197 ldd $rem($rem_4bit),$rem
198
199 xor $rem,$Zhh,$Zhh
200 depd,z $Zll,60,4,$rem
201
202 shrpd $Zhh,$Zll,4,$Zll
203 extrd,u $Zhh,59,60,$Zhh
204 ldd $nhi($Hll),$Tll
205 ldd $nhi($Hhh),$Thh
206
207 xor $Tll,$Zll,$Zll
208 xor $Thh,$Zhh,$Zhh
209 ldd $rem($rem_4bit),$rem
210
211 xor $rem,$Zhh,$Zhh
212 std $Zll,8($Xi)
213 std $Zhh,0($Xi)
214___
215\f
216$code.=<<___ if ($SIZE_T==4);
217 b L\$done_gmult
218 nop
219
220L\$parisc1_gmult
221 ldb 15($Xi),$nlo
222 ldo 12($Htbl),$Hll
223 ldo 8($Htbl),$Hlh
224 ldo 4($Htbl),$Hhl
225
226 and $mask0xf0,$nlo,$nhi
227 zdep $nlo,27,4,$nlo
228
229 ldwx $nlo($Hll),$Zll
230 ldwx $nlo($Hlh),$Zlh
231 ldwx $nlo($Hhl),$Zhl
232 ldwx $nlo($Hhh),$Zhh
233 zdep $Zll,28,4,$rem
234 ldb 14($Xi),$nlo
235 ldwx $rem($rem_4bit),$rem
236 shrpw $Zlh,$Zll,4,$Zll
237 ldwx $nhi($Hll),$Tll
238 shrpw $Zhl,$Zlh,4,$Zlh
239 ldwx $nhi($Hlh),$Tlh
240 shrpw $Zhh,$Zhl,4,$Zhl
241 ldwx $nhi($Hhl),$Thl
242 extru $Zhh,27,28,$Zhh
243 ldwx $nhi($Hhh),$Thh
244 xor $rem,$Zhh,$Zhh
245 and $mask0xf0,$nlo,$nhi
246 zdep $nlo,27,4,$nlo
247
248 xor $Tll,$Zll,$Zll
249 ldwx $nlo($Hll),$Tll
250 xor $Tlh,$Zlh,$Zlh
251 ldwx $nlo($Hlh),$Tlh
252 xor $Thl,$Zhl,$Zhl
253 b L\$oop_gmult_pa1
254 ldi 13,$cnt
255
256 .ALIGN 8
257L\$oop_gmult_pa1
258 zdep $Zll,28,4,$rem
259 ldwx $nlo($Hhl),$Thl
260 xor $Thh,$Zhh,$Zhh
261 ldwx $rem($rem_4bit),$rem
262 shrpw $Zlh,$Zll,4,$Zll
263 ldwx $nlo($Hhh),$Thh
264 shrpw $Zhl,$Zlh,4,$Zlh
265 ldbx $cnt($Xi),$nlo
266 xor $Tll,$Zll,$Zll
267 ldwx $nhi($Hll),$Tll
268 shrpw $Zhh,$Zhl,4,$Zhl
269 xor $Tlh,$Zlh,$Zlh
270 ldwx $nhi($Hlh),$Tlh
271 extru $Zhh,27,28,$Zhh
272 xor $Thl,$Zhl,$Zhl
273 ldwx $nhi($Hhl),$Thl
274 xor $rem,$Zhh,$Zhh
275 zdep $Zll,28,4,$rem
276 xor $Thh,$Zhh,$Zhh
277 ldwx $nhi($Hhh),$Thh
278 shrpw $Zlh,$Zll,4,$Zll
279 ldwx $rem($rem_4bit),$rem
280 shrpw $Zhl,$Zlh,4,$Zlh
281 shrpw $Zhh,$Zhl,4,$Zhl
282 and $mask0xf0,$nlo,$nhi
283 extru $Zhh,27,28,$Zhh
284 zdep $nlo,27,4,$nlo
285 xor $Tll,$Zll,$Zll
286 ldwx $nlo($Hll),$Tll
287 xor $Tlh,$Zlh,$Zlh
288 ldwx $nlo($Hlh),$Tlh
289 xor $rem,$Zhh,$Zhh
290 addib,uv -1,$cnt,L\$oop_gmult_pa1
291 xor $Thl,$Zhl,$Zhl
292
293 zdep $Zll,28,4,$rem
294 ldwx $nlo($Hhl),$Thl
295 xor $Thh,$Zhh,$Zhh
296 ldwx $rem($rem_4bit),$rem
297 shrpw $Zlh,$Zll,4,$Zll
298 ldwx $nlo($Hhh),$Thh
299 shrpw $Zhl,$Zlh,4,$Zlh
300 xor $Tll,$Zll,$Zll
301 ldwx $nhi($Hll),$Tll
302 shrpw $Zhh,$Zhl,4,$Zhl
303 xor $Tlh,$Zlh,$Zlh
304 ldwx $nhi($Hlh),$Tlh
305 extru $Zhh,27,28,$Zhh
306 xor $rem,$Zhh,$Zhh
307 xor $Thl,$Zhl,$Zhl
308 ldwx $nhi($Hhl),$Thl
309 xor $Thh,$Zhh,$Zhh
310 ldwx $nhi($Hhh),$Thh
311 zdep $Zll,28,4,$rem
312 ldwx $rem($rem_4bit),$rem
313 shrpw $Zlh,$Zll,4,$Zll
314 shrpw $Zhl,$Zlh,4,$Zlh
315 shrpw $Zhh,$Zhl,4,$Zhl
316 extru $Zhh,27,28,$Zhh
317 xor $Tll,$Zll,$Zll
318 xor $Tlh,$Zlh,$Zlh
319 xor $rem,$Zhh,$Zhh
320 stw $Zll,12($Xi)
321 xor $Thl,$Zhl,$Zhl
322 stw $Zlh,8($Xi)
323 xor $Thh,$Zhh,$Zhh
324 stw $Zhl,4($Xi)
325 stw $Zhh,0($Xi)
326___
327$code.=<<___;
328L\$done_gmult
329 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
330 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
331 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
332 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
333___
334$code.=<<___ if ($SIZE_T==4);
335 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
336 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
337 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
338 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
339 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
340___
341$code.=<<___;
342 bv (%r2)
343 .EXIT
344 $POPMB -$FRAME(%sp),%r3
345 .PROCEND
346
347 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
348 .ALIGN 64
349gcm_ghash_4bit
350 .PROC
351 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
352 .ENTRY
353 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
354 $PUSHMA %r3,$FRAME(%sp)
355 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
356 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
357 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
358___
359$code.=<<___ if ($SIZE_T==4);
360 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
361 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
362 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
363 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
364 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
365___
366$code.=<<___;
367 blr %r0,$rem_4bit
368 ldi 3,$rem
369L\$pic_ghash
370 andcm $rem_4bit,$rem,$rem_4bit
371 addl $inp,$len,$len
372 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
373 ldi 0xf0,$mask0xf0
374___
375$code.=<<___ if ($SIZE_T==4);
376 ldi 31,$rem
377 mtctl $rem,%cr11
378 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
379 b L\$parisc1_ghash
380 nop
381___
382\f\f
383$code.=<<___;
384 ldb 15($Xi),$nlo
385 ldo 8($Htbl),$Hll
386
387L\$outer_ghash_pa2
388 ldb 15($inp),$nhi
389 xor $nhi,$nlo,$nlo
390 and $mask0xf0,$nlo,$nhi
391 depd,z $nlo,59,4,$nlo
392
393 ldd $nlo($Hll),$Zll
394 ldd $nlo($Hhh),$Zhh
395
396 depd,z $Zll,60,4,$rem
397 shrpd $Zhh,$Zll,4,$Zll
398 extrd,u $Zhh,59,60,$Zhh
399 ldb 14($Xi),$nlo
400 ldb 14($inp),$byte
401
402 ldd $nhi($Hll),$Tll
403 ldd $nhi($Hhh),$Thh
404 xor $byte,$nlo,$nlo
405 and $mask0xf0,$nlo,$nhi
406 depd,z $nlo,59,4,$nlo
407
408 xor $Tll,$Zll,$Zll
409 xor $Thh,$Zhh,$Zhh
410 ldd $rem($rem_4bit),$rem
411 b L\$oop_ghash_pa2
412 ldi 13,$cnt
413
414 .ALIGN 8
415L\$oop_ghash_pa2
416 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
417 depd,z $Zll,60,4,$rem2
418
419 shrpd $Zhh,$Zll,4,$Zll
420 extrd,u $Zhh,59,60,$Zhh
421 ldd $nlo($Hll),$Tll
422 ldd $nlo($Hhh),$Thh
423
424 xor $Tll,$Zll,$Zll
425 xor $Thh,$Zhh,$Zhh
426 ldbx $cnt($Xi),$nlo
427 ldbx $cnt($inp),$byte
428
429 depd,z $Zll,60,4,$rem
430 shrpd $Zhh,$Zll,4,$Zll
431 ldd $rem2($rem_4bit),$rem2
432
433 xor $rem2,$Zhh,$Zhh
434 xor $byte,$nlo,$nlo
435 ldd $nhi($Hll),$Tll
436 ldd $nhi($Hhh),$Thh
437
438 and $mask0xf0,$nlo,$nhi
439 depd,z $nlo,59,4,$nlo
440
441 extrd,u $Zhh,59,60,$Zhh
442 xor $Tll,$Zll,$Zll
443
444 ldd $rem($rem_4bit),$rem
445 addib,uv -1,$cnt,L\$oop_ghash_pa2
446 xor $Thh,$Zhh,$Zhh
447
448 xor $rem,$Zhh,$Zhh
449 depd,z $Zll,60,4,$rem2
450
451 shrpd $Zhh,$Zll,4,$Zll
452 extrd,u $Zhh,59,60,$Zhh
453 ldd $nlo($Hll),$Tll
454 ldd $nlo($Hhh),$Thh
455
456 xor $Tll,$Zll,$Zll
457 xor $Thh,$Zhh,$Zhh
458
459 depd,z $Zll,60,4,$rem
460 shrpd $Zhh,$Zll,4,$Zll
461 ldd $rem2($rem_4bit),$rem2
462
463 xor $rem2,$Zhh,$Zhh
464 ldd $nhi($Hll),$Tll
465 ldd $nhi($Hhh),$Thh
466
467 extrd,u $Zhh,59,60,$Zhh
468 xor $Tll,$Zll,$Zll
469 xor $Thh,$Zhh,$Zhh
470 ldd $rem($rem_4bit),$rem
471
472 xor $rem,$Zhh,$Zhh
473 std $Zll,8($Xi)
474 ldo 16($inp),$inp
475 std $Zhh,0($Xi)
476 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
477 copy $Zll,$nlo
478___
479\f
480$code.=<<___ if ($SIZE_T==4);
481 b L\$done_ghash
482 nop
483
484L\$parisc1_ghash
485 ldb 15($Xi),$nlo
486 ldo 12($Htbl),$Hll
487 ldo 8($Htbl),$Hlh
488 ldo 4($Htbl),$Hhl
489
490L\$outer_ghash_pa1
491 ldb 15($inp),$byte
492 xor $byte,$nlo,$nlo
493 and $mask0xf0,$nlo,$nhi
494 zdep $nlo,27,4,$nlo
495
496 ldwx $nlo($Hll),$Zll
497 ldwx $nlo($Hlh),$Zlh
498 ldwx $nlo($Hhl),$Zhl
499 ldwx $nlo($Hhh),$Zhh
500 zdep $Zll,28,4,$rem
501 ldb 14($Xi),$nlo
502 ldb 14($inp),$byte
503 ldwx $rem($rem_4bit),$rem
504 shrpw $Zlh,$Zll,4,$Zll
505 ldwx $nhi($Hll),$Tll
506 shrpw $Zhl,$Zlh,4,$Zlh
507 ldwx $nhi($Hlh),$Tlh
508 shrpw $Zhh,$Zhl,4,$Zhl
509 ldwx $nhi($Hhl),$Thl
510 extru $Zhh,27,28,$Zhh
511 ldwx $nhi($Hhh),$Thh
512 xor $byte,$nlo,$nlo
513 xor $rem,$Zhh,$Zhh
514 and $mask0xf0,$nlo,$nhi
515 zdep $nlo,27,4,$nlo
516
517 xor $Tll,$Zll,$Zll
518 ldwx $nlo($Hll),$Tll
519 xor $Tlh,$Zlh,$Zlh
520 ldwx $nlo($Hlh),$Tlh
521 xor $Thl,$Zhl,$Zhl
522 b L\$oop_ghash_pa1
523 ldi 13,$cnt
524
525 .ALIGN 8
526L\$oop_ghash_pa1
527 zdep $Zll,28,4,$rem
528 ldwx $nlo($Hhl),$Thl
529 xor $Thh,$Zhh,$Zhh
530 ldwx $rem($rem_4bit),$rem
531 shrpw $Zlh,$Zll,4,$Zll
532 ldwx $nlo($Hhh),$Thh
533 shrpw $Zhl,$Zlh,4,$Zlh
534 ldbx $cnt($Xi),$nlo
535 xor $Tll,$Zll,$Zll
536 ldwx $nhi($Hll),$Tll
537 shrpw $Zhh,$Zhl,4,$Zhl
538 ldbx $cnt($inp),$byte
539 xor $Tlh,$Zlh,$Zlh
540 ldwx $nhi($Hlh),$Tlh
541 extru $Zhh,27,28,$Zhh
542 xor $Thl,$Zhl,$Zhl
543 ldwx $nhi($Hhl),$Thl
544 xor $rem,$Zhh,$Zhh
545 zdep $Zll,28,4,$rem
546 xor $Thh,$Zhh,$Zhh
547 ldwx $nhi($Hhh),$Thh
548 shrpw $Zlh,$Zll,4,$Zll
549 ldwx $rem($rem_4bit),$rem
550 shrpw $Zhl,$Zlh,4,$Zlh
551 xor $byte,$nlo,$nlo
552 shrpw $Zhh,$Zhl,4,$Zhl
553 and $mask0xf0,$nlo,$nhi
554 extru $Zhh,27,28,$Zhh
555 zdep $nlo,27,4,$nlo
556 xor $Tll,$Zll,$Zll
557 ldwx $nlo($Hll),$Tll
558 xor $Tlh,$Zlh,$Zlh
559 ldwx $nlo($Hlh),$Tlh
560 xor $rem,$Zhh,$Zhh
561 addib,uv -1,$cnt,L\$oop_ghash_pa1
562 xor $Thl,$Zhl,$Zhl
563
564 zdep $Zll,28,4,$rem
565 ldwx $nlo($Hhl),$Thl
566 xor $Thh,$Zhh,$Zhh
567 ldwx $rem($rem_4bit),$rem
568 shrpw $Zlh,$Zll,4,$Zll
569 ldwx $nlo($Hhh),$Thh
570 shrpw $Zhl,$Zlh,4,$Zlh
571 xor $Tll,$Zll,$Zll
572 ldwx $nhi($Hll),$Tll
573 shrpw $Zhh,$Zhl,4,$Zhl
574 xor $Tlh,$Zlh,$Zlh
575 ldwx $nhi($Hlh),$Tlh
576 extru $Zhh,27,28,$Zhh
577 xor $rem,$Zhh,$Zhh
578 xor $Thl,$Zhl,$Zhl
579 ldwx $nhi($Hhl),$Thl
580 xor $Thh,$Zhh,$Zhh
581 ldwx $nhi($Hhh),$Thh
582 zdep $Zll,28,4,$rem
583 ldwx $rem($rem_4bit),$rem
584 shrpw $Zlh,$Zll,4,$Zll
585 shrpw $Zhl,$Zlh,4,$Zlh
586 shrpw $Zhh,$Zhl,4,$Zhl
587 extru $Zhh,27,28,$Zhh
588 xor $Tll,$Zll,$Zll
589 xor $Tlh,$Zlh,$Zlh
590 xor $rem,$Zhh,$Zhh
591 stw $Zll,12($Xi)
592 xor $Thl,$Zhl,$Zhl
593 stw $Zlh,8($Xi)
594 xor $Thh,$Zhh,$Zhh
595 stw $Zhl,4($Xi)
596 ldo 16($inp),$inp
597 stw $Zhh,0($Xi)
598 comb,<> $inp,$len,L\$outer_ghash_pa1
599 copy $Zll,$nlo
600___
601$code.=<<___;
602L\$done_ghash
603 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
604 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
605 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
606 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
607___
608$code.=<<___ if ($SIZE_T==4);
609 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
610 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
611 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
612 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
613 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
614___
615$code.=<<___;
616 bv (%r2)
617 .EXIT
618 $POPMB -$FRAME(%sp),%r3
619 .PROCEND
620
621 .ALIGN 64
622L\$rem_4bit
623 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
624 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
625 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
626 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
627 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
628 .ALIGN 64
629___
630
631# Explicitly encode PA-RISC 2.0 instructions used in this module, so
632# that it can be compiled with .LEVEL 1.0. It should be noted that I
633# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
634# directive...
635
636my $ldd = sub {
637 my ($mod,$args) = @_;
638 my $orig = "ldd$mod\t$args";
639
640 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
641 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
642 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
643 }
644 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
645 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
646 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
647 $opcode|=(1<<5) if ($mod =~ /^,m/);
648 $opcode|=(1<<13) if ($mod =~ /^,mb/);
649 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
650 }
651 else { "\t".$orig; }
652};
653
654my $std = sub {
655 my ($mod,$args) = @_;
656 my $orig = "std$mod\t$args";
657
658 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
659 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
660 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
661 }
662 else { "\t".$orig; }
663};
664
665my $extrd = sub {
666 my ($mod,$args) = @_;
667 my $orig = "extrd$mod\t$args";
668
669 # I only have ",u" completer, it's implicitly encoded...
670 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
671 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
672 my $len=32-$3;
673 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
674 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
675 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
676 }
677 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
678 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
679 my $len=32-$2;
680 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
681 $opcode |= (1<<13) if ($mod =~ /,\**=/);
682 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
683 }
684 else { "\t".$orig; }
685};
686
687my $shrpd = sub {
688 my ($mod,$args) = @_;
689 my $orig = "shrpd$mod\t$args";
690
691 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
692 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
693 my $cpos=63-$3;
694 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
695 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
696 }
697 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
698 { sprintf "\t.WORD\t0x%08x\t; %s",
699 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
700 }
701 else { "\t".$orig; }
702};
703
704my $depd = sub {
705 my ($mod,$args) = @_;
706 my $orig = "depd$mod\t$args";
707
46f4e1be 708 # I only have ",z" completer, it's implicitly encoded...
5e19ee96
AP
709 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
710 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
711 my $cpos=63-$2;
712 my $len=32-$3;
713 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
714 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
715 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
716 }
717 else { "\t".$orig; }
718};
719
720sub assemble {
721 my ($mnemonic,$mod,$args)=@_;
722 my $opcode = eval("\$$mnemonic");
723
724 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
725}
726
727foreach (split("\n",$code)) {
728 s/\`([^\`]*)\`/eval $1/ge;
729 if ($SIZE_T==4) {
730 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
731 s/cmpb,\*/comb,/;
732 s/,\*/,/;
733 }
02450ec6 734 s/\bbv\b/bve/ if ($SIZE_T==8);
5e19ee96
AP
735 print $_,"\n";
736}
737
738close STDOUT;