]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/bn/asm/bn-alpha.pl
Import of old SSLeay release: SSLeay 0.9.1b (unreleased)
[thirdparty/openssl.git] / crypto / bn / asm / bn-alpha.pl
1 #!/usr/local/bin/perl
2 # I have this in perl so I can use more usefull register names and then convert
3 # them into alpha registers.
4 #
5
6 $d=&data();
7 $d =~ s/CC/0/g;
8 $d =~ s/R1/1/g;
9 $d =~ s/R2/2/g;
10 $d =~ s/R3/3/g;
11 $d =~ s/R4/4/g;
12 $d =~ s/L1/5/g;
13 $d =~ s/L2/6/g;
14 $d =~ s/L3/7/g;
15 $d =~ s/L4/8/g;
16 $d =~ s/O1/22/g;
17 $d =~ s/O2/23/g;
18 $d =~ s/O3/24/g;
19 $d =~ s/O4/25/g;
20 $d =~ s/A1/20/g;
21 $d =~ s/A2/21/g;
22 $d =~ s/A3/27/g;
23 $d =~ s/A4/28/g;
24 if (0){
25 }
26
27 print $d;
28
29 sub data
30 {
31 local($data)=<<'EOF';
32
33 # DEC Alpha assember
34 # The bn_div_words is actually gcc output but the other parts are hand done.
35 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
36 # bn_div_words.
37 # I've gone back and re-done most of routines.
38 # The key thing to remeber for the 164 CPU is that while a
39 # multiply operation takes 8 cycles, another one can only be issued
40 # after 4 cycles have elapsed. I've done modification to help
41 # improve this. Also, normally, a ld instruction will not be available
42 # for about 3 cycles.
43 .file 1 "bn_asm.c"
44 .set noat
45 gcc2_compiled.:
46 __gnu_compiled_c:
47 .text
48 .align 3
49 .globl bn_mul_add_words
50 .ent bn_mul_add_words
51 bn_mul_add_words:
52 bn_mul_add_words..ng:
53 .frame $30,0,$26,0
54 .prologue 0
55 .align 5
56 subq $18,4,$18
57 bis $31,$31,$CC
58 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
59 ldq $A1,0($17) # 1 1
60 ldq $R1,0($16) # 1 1
61 .align 3
62 $42:
63 mulq $A1,$19,$L1 # 1 2 1 ######
64 ldq $A2,8($17) # 2 1
65 ldq $R2,8($16) # 2 1
66 umulh $A1,$19,$A1 # 1 2 ######
67 ldq $A3,16($17) # 3 1
68 ldq $R3,16($16) # 3 1
69 mulq $A2,$19,$L2 # 2 2 1 ######
70 ldq $A4,24($17) # 4 1
71 addq $R1,$L1,$R1 # 1 2 2
72 ldq $R4,24($16) # 4 1
73 umulh $A2,$19,$A2 # 2 2 ######
74 cmpult $R1,$L1,$O1 # 1 2 3 1
75 addq $A1,$O1,$A1 # 1 3 1
76 addq $R1,$CC,$R1 # 1 2 3 1
77 mulq $A3,$19,$L3 # 3 2 1 ######
78 cmpult $R1,$CC,$CC # 1 2 3 2
79 addq $R2,$L2,$R2 # 2 2 2
80 addq $A1,$CC,$CC # 1 3 2
81 cmpult $R2,$L2,$O2 # 2 2 3 1
82 addq $A2,$O2,$A2 # 2 3 1
83 umulh $A3,$19,$A3 # 3 2 ######
84 addq $R2,$CC,$R2 # 2 2 3 1
85 cmpult $R2,$CC,$CC # 2 2 3 2
86 subq $18,4,$18
87 mulq $A4,$19,$L4 # 4 2 1 ######
88 addq $A2,$CC,$CC # 2 3 2
89 addq $R3,$L3,$R3 # 3 2 2
90 addq $16,32,$16
91 cmpult $R3,$L3,$O3 # 3 2 3 1
92 stq $R1,-32($16) # 1 2 4
93 umulh $A4,$19,$A4 # 4 2 ######
94 addq $A3,$O3,$A3 # 3 3 1
95 addq $R3,$CC,$R3 # 3 2 3 1
96 stq $R2,-24($16) # 2 2 4
97 cmpult $R3,$CC,$CC # 3 2 3 2
98 stq $R3,-16($16) # 3 2 4
99 addq $R4,$L4,$R4 # 4 2 2
100 addq $A3,$CC,$CC # 3 3 2
101 cmpult $R4,$L4,$O4 # 4 2 3 1
102 addq $17,32,$17
103 addq $A4,$O4,$A4 # 4 3 1
104 addq $R4,$CC,$R4 # 4 2 3 1
105 cmpult $R4,$CC,$CC # 4 2 3 2
106 stq $R4,-8($16) # 4 2 4
107 addq $A4,$CC,$CC # 4 3 2
108 blt $18,$43
109
110 ldq $A1,0($17) # 1 1
111 ldq $R1,0($16) # 1 1
112
113 br $42
114
115 .align 4
116 $45:
117 ldq $A1,0($17) # 4 1
118 ldq $R1,0($16) # 4 1
119 mulq $A1,$19,$L1 # 4 2 1
120 subq $18,1,$18
121 addq $16,8,$16
122 addq $17,8,$17
123 umulh $A1,$19,$A1 # 4 2
124 addq $R1,$L1,$R1 # 4 2 2
125 cmpult $R1,$L1,$O1 # 4 2 3 1
126 addq $A1,$O1,$A1 # 4 3 1
127 addq $R1,$CC,$R1 # 4 2 3 1
128 cmpult $R1,$CC,$CC # 4 2 3 2
129 addq $A1,$CC,$CC # 4 3 2
130 stq $R1,-8($16) # 4 2 4
131 bgt $18,$45
132 ret $31,($26),1 # else exit
133
134 .align 4
135 $43:
136 addq $18,4,$18
137 bgt $18,$45 # goto tail code
138 ret $31,($26),1 # else exit
139
140 .end bn_mul_add_words
141 .align 3
142 .globl bn_mul_words
143 .ent bn_mul_words
144 bn_mul_words:
145 bn_mul_words..ng:
146 .frame $30,0,$26,0
147 .prologue 0
148 .align 5
149 subq $18,4,$18
150 bis $31,$31,$CC
151 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
152 ldq $A1,0($17) # 1 1
153 .align 3
154 $142:
155
156 mulq $A1,$19,$L1 # 1 2 1 #####
157 ldq $A2,8($17) # 2 1
158 ldq $A3,16($17) # 3 1
159 umulh $A1,$19,$A1 # 1 2 #####
160 ldq $A4,24($17) # 4 1
161 mulq $A2,$19,$L2 # 2 2 1 #####
162 addq $L1,$CC,$L1 # 1 2 3 1
163 subq $18,4,$18
164 cmpult $L1,$CC,$CC # 1 2 3 2
165 umulh $A2,$19,$A2 # 2 2 #####
166 addq $A1,$CC,$CC # 1 3 2
167 addq $17,32,$17
168 addq $L2,$CC,$L2 # 2 2 3 1
169 mulq $A3,$19,$L3 # 3 2 1 #####
170 cmpult $L2,$CC,$CC # 2 2 3 2
171 addq $A2,$CC,$CC # 2 3 2
172 addq $16,32,$16
173 umulh $A3,$19,$A3 # 3 2 #####
174 stq $L1,-32($16) # 1 2 4
175 mulq $A4,$19,$L4 # 4 2 1 #####
176 addq $L3,$CC,$L3 # 3 2 3 1
177 stq $L2,-24($16) # 2 2 4
178 cmpult $L3,$CC,$CC # 3 2 3 2
179 umulh $A4,$19,$A4 # 4 2 #####
180 addq $A3,$CC,$CC # 3 3 2
181 stq $L3,-16($16) # 3 2 4
182 addq $L4,$CC,$L4 # 4 2 3 1
183 cmpult $L4,$CC,$CC # 4 2 3 2
184
185 addq $A4,$CC,$CC # 4 3 2
186
187 stq $L4,-8($16) # 4 2 4
188
189 blt $18,$143
190
191 ldq $A1,0($17) # 1 1
192
193 br $142
194
195 .align 4
196 $145:
197 ldq $A1,0($17) # 4 1
198 mulq $A1,$19,$L1 # 4 2 1
199 subq $18,1,$18
200 umulh $A1,$19,$A1 # 4 2
201 addq $L1,$CC,$L1 # 4 2 3 1
202 addq $16,8,$16
203 cmpult $L1,$CC,$CC # 4 2 3 2
204 addq $17,8,$17
205 addq $A1,$CC,$CC # 4 3 2
206 stq $L1,-8($16) # 4 2 4
207
208 bgt $18,$145
209 ret $31,($26),1 # else exit
210
211 .align 4
212 $143:
213 addq $18,4,$18
214 bgt $18,$145 # goto tail code
215 ret $31,($26),1 # else exit
216
217 .end bn_mul_words
218 .align 3
219 .globl bn_sqr_words
220 .ent bn_sqr_words
221 bn_sqr_words:
222 bn_sqr_words..ng:
223 .frame $30,0,$26,0
224 .prologue 0
225
226 subq $18,4,$18
227 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
228 ldq $A1,0($17) # 1 1
229 .align 3
230 $542:
231 mulq $A1,$A1,$L1 ######
232 ldq $A2,8($17) # 1 1
233 subq $18,4
234 umulh $A1,$A1,$R1 ######
235 ldq $A3,16($17) # 1 1
236 mulq $A2,$A2,$L2 ######
237 ldq $A4,24($17) # 1 1
238 stq $L1,0($16) # r[0]
239 umulh $A2,$A2,$R2 ######
240 stq $R1,8($16) # r[1]
241 mulq $A3,$A3,$L3 ######
242 stq $L2,16($16) # r[0]
243 umulh $A3,$A3,$R3 ######
244 stq $R2,24($16) # r[1]
245 mulq $A4,$A4,$L4 ######
246 stq $L3,32($16) # r[0]
247 umulh $A4,$A4,$R4 ######
248 stq $R3,40($16) # r[1]
249
250 addq $16,64,$16
251 addq $17,32,$17
252 stq $L4,-16($16) # r[0]
253 stq $R4,-8($16) # r[1]
254
255 blt $18,$543
256 ldq $A1,0($17) # 1 1
257 br $542
258
259 $442:
260 ldq $A1,0($17) # a[0]
261 mulq $A1,$A1,$L1 # a[0]*w low part r2
262 addq $16,16,$16
263 addq $17,8,$17
264 subq $18,1,$18
265 umulh $A1,$A1,$R1 # a[0]*w high part r3
266 stq $L1,-16($16) # r[0]
267 stq $R1,-8($16) # r[1]
268
269 bgt $18,$442
270 ret $31,($26),1 # else exit
271
272 .align 4
273 $543:
274 addq $18,4,$18
275 bgt $18,$442 # goto tail code
276 ret $31,($26),1 # else exit
277 .end bn_sqr_words
278
279 .align 3
280 .globl bn_add_words
281 .ent bn_add_words
282 bn_add_words:
283 bn_add_words..ng:
284 .frame $30,0,$26,0
285 .prologue 0
286
287 subq $19,4,$19
288 bis $31,$31,$CC # carry = 0
289 blt $19,$900
290 ldq $L1,0($17) # a[0]
291 ldq $R1,0($18) # b[1]
292 .align 3
293 $901:
294 addq $R1,$L1,$R1 # r=a+b;
295 ldq $L2,8($17) # a[1]
296 cmpult $R1,$L1,$O1 # did we overflow?
297 ldq $R2,8($18) # b[1]
298 addq $R1,$CC,$R1 # c+= overflow
299 ldq $L3,16($17) # a[2]
300 cmpult $R1,$CC,$CC # overflow?
301 ldq $R3,16($18) # b[2]
302 addq $CC,$O1,$CC
303 ldq $L4,24($17) # a[3]
304 addq $R2,$L2,$R2 # r=a+b;
305 ldq $R4,24($18) # b[3]
306 cmpult $R2,$L2,$O2 # did we overflow?
307 addq $R3,$L3,$R3 # r=a+b;
308 addq $R2,$CC,$R2 # c+= overflow
309 cmpult $R3,$L3,$O3 # did we overflow?
310 cmpult $R2,$CC,$CC # overflow?
311 addq $R4,$L4,$R4 # r=a+b;
312 addq $CC,$O2,$CC
313 cmpult $R4,$L4,$O4 # did we overflow?
314 addq $R3,$CC,$R3 # c+= overflow
315 stq $R1,0($16) # r[0]=c
316 cmpult $R3,$CC,$CC # overflow?
317 stq $R2,8($16) # r[1]=c
318 addq $CC,$O3,$CC
319 stq $R3,16($16) # r[2]=c
320 addq $R4,$CC,$R4 # c+= overflow
321 subq $19,4,$19 # loop--
322 cmpult $R4,$CC,$CC # overflow?
323 addq $17,32,$17 # a++
324 addq $CC,$O4,$CC
325 stq $R4,24($16) # r[3]=c
326 addq $18,32,$18 # b++
327 addq $16,32,$16 # r++
328
329 blt $19,$900
330 ldq $L1,0($17) # a[0]
331 ldq $R1,0($18) # b[1]
332 br $901
333 .align 4
334 $945:
335 ldq $L1,0($17) # a[0]
336 ldq $R1,0($18) # b[1]
337 addq $R1,$L1,$R1 # r=a+b;
338 subq $19,1,$19 # loop--
339 addq $R1,$CC,$R1 # c+= overflow
340 addq $17,8,$17 # a++
341 cmpult $R1,$L1,$O1 # did we overflow?
342 cmpult $R1,$CC,$CC # overflow?
343 addq $18,8,$18 # b++
344 stq $R1,0($16) # r[0]=c
345 addq $CC,$O1,$CC
346 addq $16,8,$16 # r++
347
348 bgt $19,$945
349 ret $31,($26),1 # else exit
350
351 $900:
352 addq $19,4,$19
353 bgt $19,$945 # goto tail code
354 ret $31,($26),1 # else exit
355 .end bn_add_words
356
357 .align 3
358 .globl bn_sub_words
359 .ent bn_sub_words
360 bn_sub_words:
361 bn_sub_words..ng:
362 .frame $30,0,$26,0
363 .prologue 0
364
365 subq $19,4,$19
366 bis $31,$31,$CC # carry = 0
367 br $800
368 blt $19,$800
369 ldq $L1,0($17) # a[0]
370 ldq $R1,0($18) # b[1]
371 .align 3
372 $801:
373 addq $R1,$L1,$R1 # r=a+b;
374 ldq $L2,8($17) # a[1]
375 cmpult $R1,$L1,$O1 # did we overflow?
376 ldq $R2,8($18) # b[1]
377 addq $R1,$CC,$R1 # c+= overflow
378 ldq $L3,16($17) # a[2]
379 cmpult $R1,$CC,$CC # overflow?
380 ldq $R3,16($18) # b[2]
381 addq $CC,$O1,$CC
382 ldq $L4,24($17) # a[3]
383 addq $R2,$L2,$R2 # r=a+b;
384 ldq $R4,24($18) # b[3]
385 cmpult $R2,$L2,$O2 # did we overflow?
386 addq $R3,$L3,$R3 # r=a+b;
387 addq $R2,$CC,$R2 # c+= overflow
388 cmpult $R3,$L3,$O3 # did we overflow?
389 cmpult $R2,$CC,$CC # overflow?
390 addq $R4,$L4,$R4 # r=a+b;
391 addq $CC,$O2,$CC
392 cmpult $R4,$L4,$O4 # did we overflow?
393 addq $R3,$CC,$R3 # c+= overflow
394 stq $R1,0($16) # r[0]=c
395 cmpult $R3,$CC,$CC # overflow?
396 stq $R2,8($16) # r[1]=c
397 addq $CC,$O3,$CC
398 stq $R3,16($16) # r[2]=c
399 addq $R4,$CC,$R4 # c+= overflow
400 subq $19,4,$19 # loop--
401 cmpult $R4,$CC,$CC # overflow?
402 addq $17,32,$17 # a++
403 addq $CC,$O4,$CC
404 stq $R4,24($16) # r[3]=c
405 addq $18,32,$18 # b++
406 addq $16,32,$16 # r++
407
408 blt $19,$800
409 ldq $L1,0($17) # a[0]
410 ldq $R1,0($18) # b[1]
411 br $801
412 .align 4
413 $845:
414 ldq $L1,0($17) # a[0]
415 ldq $R1,0($18) # b[1]
416 cmpult $L1,$R1,$O1 # will we borrow?
417 subq $L1,$R1,$R1 # r=a-b;
418 subq $19,1,$19 # loop--
419 cmpult $R1,$CC,$O2 # will we borrow?
420 subq $R1,$CC,$R1 # c+= overflow
421 addq $17,8,$17 # a++
422 addq $18,8,$18 # b++
423 stq $R1,0($16) # r[0]=c
424 addq $O2,$O1,$CC
425 addq $16,8,$16 # r++
426
427 bgt $19,$845
428 ret $31,($26),1 # else exit
429
430 $800:
431 addq $19,4,$19
432 bgt $19,$845 # goto tail code
433 ret $31,($26),1 # else exit
434 .end bn_sub_words
435
436 #
437 # What follows was taken directly from the C compiler with a few
438 # hacks to redo the lables.
439 #
440 .text
441 .align 3
442 .globl bn_div_words
443 .ent bn_div_words
444 bn_div_words:
445 ldgp $29,0($27)
446 bn_div_words..ng:
447 lda $30,-48($30)
448 .frame $30,48,$26,0
449 stq $26,0($30)
450 stq $9,8($30)
451 stq $10,16($30)
452 stq $11,24($30)
453 stq $12,32($30)
454 stq $13,40($30)
455 .mask 0x4003e00,-48
456 .prologue 1
457 bis $16,$16,$9
458 bis $17,$17,$10
459 bis $18,$18,$11
460 bis $31,$31,$13
461 bis $31,2,$12
462 bne $11,$119
463 lda $0,-1
464 br $31,$136
465 .align 4
466 $119:
467 bis $11,$11,$16
468 jsr $26,BN_num_bits_word
469 ldgp $29,0($26)
470 subq $0,64,$1
471 beq $1,$120
472 bis $31,1,$1
473 sll $1,$0,$1
474 cmpule $9,$1,$1
475 bne $1,$120
476 # lda $16,_IO_stderr_
477 # lda $17,$C32
478 # bis $0,$0,$18
479 # jsr $26,fprintf
480 # ldgp $29,0($26)
481 jsr $26,abort
482 ldgp $29,0($26)
483 .align 4
484 $120:
485 bis $31,64,$3
486 cmpult $9,$11,$2
487 subq $3,$0,$1
488 addl $1,$31,$0
489 subq $9,$11,$1
490 cmoveq $2,$1,$9
491 beq $0,$122
492 zapnot $0,15,$2
493 subq $3,$0,$1
494 sll $11,$2,$11
495 sll $9,$2,$3
496 srl $10,$1,$1
497 sll $10,$2,$10
498 bis $3,$1,$9
499 $122:
500 srl $11,32,$5
501 zapnot $11,15,$6
502 lda $7,-1
503 .align 5
504 $123:
505 srl $9,32,$1
506 subq $1,$5,$1
507 bne $1,$126
508 zapnot $7,15,$27
509 br $31,$127
510 .align 4
511 $126:
512 bis $9,$9,$24
513 bis $5,$5,$25
514 divqu $24,$25,$27
515 $127:
516 srl $10,32,$4
517 .align 5
518 $128:
519 mulq $27,$5,$1
520 subq $9,$1,$3
521 zapnot $3,240,$1
522 bne $1,$129
523 mulq $6,$27,$2
524 sll $3,32,$1
525 addq $1,$4,$1
526 cmpule $2,$1,$2
527 bne $2,$129
528 subq $27,1,$27
529 br $31,$128
530 .align 4
531 $129:
532 mulq $27,$6,$1
533 mulq $27,$5,$4
534 srl $1,32,$3
535 sll $1,32,$1
536 addq $4,$3,$4
537 cmpult $10,$1,$2
538 subq $10,$1,$10
539 addq $2,$4,$2
540 cmpult $9,$2,$1
541 bis $2,$2,$4
542 beq $1,$134
543 addq $9,$11,$9
544 subq $27,1,$27
545 $134:
546 subl $12,1,$12
547 subq $9,$4,$9
548 beq $12,$124
549 sll $27,32,$13
550 sll $9,32,$2
551 srl $10,32,$1
552 sll $10,32,$10
553 bis $2,$1,$9
554 br $31,$123
555 .align 4
556 $124:
557 bis $13,$27,$0
558 $136:
559 ldq $26,0($30)
560 ldq $9,8($30)
561 ldq $10,16($30)
562 ldq $11,24($30)
563 ldq $12,32($30)
564 ldq $13,40($30)
565 addq $30,48,$30
566 ret $31,($26),1
567 .end bn_div_words
568 EOF
569 return($data);
570 }
571