]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/bn/asm/sparcv9-mont.pl
Update copyright year
[thirdparty/openssl.git] / crypto / bn / asm / sparcv9-mont.pl
CommitLineData
6aa36e8e 1#! /usr/bin/env perl
33388b44 2# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved.
6aa36e8e 3#
367ace68 4# Licensed under the Apache License 2.0 (the "License"). You may not use
6aa36e8e
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
68ea6068
AP
9
10# ====================================================================
e3713c36 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
7d9cf7c0
AP
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
68ea6068
AP
15# ====================================================================
16
17# December 2005
18#
19# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
20# for undertaken effort are multiple. First of all, UltraSPARC is not
21# the whole SPARCv9 universe and other VIS-free implementations deserve
22# optimized code as much. Secondly, newly introduced UltraSPARC T1,
60250017 23# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
68ea6068
AP
24# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
25# several integrated RSA/DSA accelerator circuits accessible through
26# kernel driver [only(*)], but having decent user-land software
27# implementation is important too. Finally, reasons like desire to
28# experiment with dedicated squaring procedure. Yes, this module
29# implements one, because it was easiest to draft it in SPARCv9
30# instructions...
31
32# (*) Engine accessing the driver in question is on my TODO list.
69687aa8 33# For reference, accelerator is estimated to give 6 to 10 times
68ea6068
AP
34# improvement on single-threaded RSA sign. It should be noted
35# that 6-10x improvement coefficient does not actually mean
36# something extraordinary in terms of absolute [single-threaded]
37# performance, as SPARCv9 instruction set is by all means least
38# suitable for high performance crypto among other 64 bit
39# platforms. 6-10x factor simply places T1 in same performance
40# domain as say AMD64 and IA-64. Improvement of RSA verify don't
41# appear impressive at all, but it's the sign operation which is
42# far more critical/interesting.
43
44# You might notice that inner loops are modulo-scheduled:-) This has
45# essentially negligible impact on UltraSPARC performance, it's
46# Fujitsu SPARC64 V users who should notice and hopefully appreciate
47# the advantage... Currently this module surpasses sparcv9a-mont.pl
48# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
49# module still have hidden potential [see TODO list there], which is
50# estimated to be larger than 20%...
51
1aa89a7a 52$output = pop and open STDOUT,">$output";
6bd7a4d9 53
68ea6068
AP
54# int bn_mul_mont(
55$rp="%i0"; # BN_ULONG *rp,
56$ap="%i1"; # const BN_ULONG *ap,
57$bp="%i2"; # const BN_ULONG *bp,
58$np="%i3"; # const BN_ULONG *np,
59$n0="%i4"; # const BN_ULONG *n0,
60$num="%i5"; # int num);
61
eb77e888
AP
62$frame="STACK_FRAME";
63$bias="STACK_BIAS";
68ea6068
AP
64
65$car0="%o0";
66$car1="%o1";
67$car2="%o2"; # 1 bit
68$acc0="%o3";
69$acc1="%o4";
70$mask="%g1"; # 32 bits, what a waste...
71$tmp0="%g4";
72$tmp1="%g5";
73
74$i="%l0";
75$j="%l1";
76$mul0="%l2";
77$mul1="%l3";
78$tp="%l4";
79$apj="%l5";
80$npj="%l6";
81$tpj="%l7";
82
a00e414f 83$fname="bn_mul_mont_int";
68ea6068
AP
84
85$code=<<___;
eb77e888
AP
86#include "sparc_arch.h"
87
68ea6068
AP
88.section ".text",#alloc,#execinstr
89
90.global $fname
91.align 32
92$fname:
93 cmp %o5,4 ! 128 bits minimum
94 bge,pt %icc,.Lenter
95 sethi %hi(0xffffffff),$mask
96 retl
97 clr %o0
98.align 32
99.Lenter:
100 save %sp,-$frame,%sp
101 sll $num,2,$num ! num*=4
102 or $mask,%lo(0xffffffff),$mask
103 ld [$n0],$n0
104 cmp $ap,$bp
105 and $num,$mask,$num
106 ld [$bp],$mul0 ! bp[0]
68ea6068
AP
107 nop
108
109 add %sp,$bias,%o7 ! real top of stack
7a5dbeb7 110 ld [$ap],$car0 ! ap[0] ! redundant in squaring context
68ea6068
AP
111 sub %o7,$num,%o7
112 ld [$ap+4],$apj ! ap[1]
113 and %o7,-1024,%o7
114 ld [$np],$car1 ! np[0]
115 sub %o7,$bias,%sp ! alloca
116 ld [$np+4],$npj ! np[1]
eb77e888 117 be,pt SIZE_T_CC,.Lbn_sqr_mont
68ea6068
AP
118 mov 12,$j
119
120 mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
121 mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0]
122 and $car0,$mask,$acc0
123 add %sp,$bias+$frame,$tp
124 ld [$ap+8],$apj !prologue!
125
126 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
127 and $mul1,$mask,$mul1
128
129 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
130 mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0
131 srlx $car0,32,$car0
132 add $acc0,$car1,$car1
133 ld [$np+8],$npj !prologue!
134 srlx $car1,32,$car1
135 mov $tmp0,$acc0 !prologue!
136
137.L1st:
138 mulx $apj,$mul0,$tmp0
139 mulx $npj,$mul1,$tmp1
140 add $acc0,$car0,$car0
141 ld [$ap+$j],$apj ! ap[j]
142 and $car0,$mask,$acc0
143 add $acc1,$car1,$car1
144 ld [$np+$j],$npj ! np[j]
145 srlx $car0,32,$car0
146 add $acc0,$car1,$car1
147 add $j,4,$j ! j++
148 mov $tmp0,$acc0
149 st $car1,[$tp]
150 cmp $j,$num
151 mov $tmp1,$acc1
152 srlx $car1,32,$car1
153 bl %icc,.L1st
154 add $tp,4,$tp ! tp++
155!.L1st
156
157 mulx $apj,$mul0,$tmp0 !epilogue!
158 mulx $npj,$mul1,$tmp1
159 add $acc0,$car0,$car0
160 and $car0,$mask,$acc0
161 add $acc1,$car1,$car1
162 srlx $car0,32,$car0
163 add $acc0,$car1,$car1
164 st $car1,[$tp]
165 srlx $car1,32,$car1
166
167 add $tmp0,$car0,$car0
168 and $car0,$mask,$acc0
169 add $tmp1,$car1,$car1
170 srlx $car0,32,$car0
171 add $acc0,$car1,$car1
172 st $car1,[$tp+4]
173 srlx $car1,32,$car1
174
175 add $car0,$car1,$car1
176 st $car1,[$tp+8]
177 srlx $car1,32,$car2
178\f
179 mov 4,$i ! i++
180 ld [$bp+4],$mul0 ! bp[1]
181.Louter:
182 add %sp,$bias+$frame,$tp
183 ld [$ap],$car0 ! ap[0]
184 ld [$ap+4],$apj ! ap[1]
185 ld [$np],$car1 ! np[0]
186 ld [$np+4],$npj ! np[1]
187 ld [$tp],$tmp1 ! tp[0]
188 ld [$tp+4],$tpj ! tp[1]
189 mov 12,$j
190
191 mulx $car0,$mul0,$car0
192 mulx $apj,$mul0,$tmp0 !prologue!
193 add $tmp1,$car0,$car0
194 ld [$ap+8],$apj !prologue!
195 and $car0,$mask,$acc0
196
197 mulx $n0,$acc0,$mul1
198 and $mul1,$mask,$mul1
199
200 mulx $car1,$mul1,$car1
201 mulx $npj,$mul1,$acc1 !prologue!
202 srlx $car0,32,$car0
203 add $acc0,$car1,$car1
204 ld [$np+8],$npj !prologue!
205 srlx $car1,32,$car1
206 mov $tmp0,$acc0 !prologue!
207
208.Linner:
209 mulx $apj,$mul0,$tmp0
210 mulx $npj,$mul1,$tmp1
211 add $tpj,$car0,$car0
212 ld [$ap+$j],$apj ! ap[j]
213 add $acc0,$car0,$car0
214 add $acc1,$car1,$car1
215 ld [$np+$j],$npj ! np[j]
216 and $car0,$mask,$acc0
217 ld [$tp+8],$tpj ! tp[j]
218 srlx $car0,32,$car0
219 add $acc0,$car1,$car1
220 add $j,4,$j ! j++
221 mov $tmp0,$acc0
222 st $car1,[$tp] ! tp[j-1]
223 srlx $car1,32,$car1
224 mov $tmp1,$acc1
225 cmp $j,$num
226 bl %icc,.Linner
227 add $tp,4,$tp ! tp++
228!.Linner
229
230 mulx $apj,$mul0,$tmp0 !epilogue!
231 mulx $npj,$mul1,$tmp1
232 add $tpj,$car0,$car0
233 add $acc0,$car0,$car0
234 ld [$tp+8],$tpj ! tp[j]
235 and $car0,$mask,$acc0
236 add $acc1,$car1,$car1
237 srlx $car0,32,$car0
238 add $acc0,$car1,$car1
239 st $car1,[$tp] ! tp[j-1]
240 srlx $car1,32,$car1
241
242 add $tpj,$car0,$car0
243 add $tmp0,$car0,$car0
244 and $car0,$mask,$acc0
245 add $tmp1,$car1,$car1
246 add $acc0,$car1,$car1
247 st $car1,[$tp+4] ! tp[j-1]
248 srlx $car0,32,$car0
249 add $i,4,$i ! i++
250 srlx $car1,32,$car1
251
252 add $car0,$car1,$car1
253 cmp $i,$num
254 add $car2,$car1,$car1
255 st $car1,[$tp+8]
256
257 srlx $car1,32,$car2
258 bl,a %icc,.Louter
259 ld [$bp+$i],$mul0 ! bp[i]
260!.Louter
261
262 add $tp,12,$tp
263\f
264.Ltail:
265 add $np,$num,$np
266 add $rp,$num,$rp
68ea6068 267 sub %g0,$num,%o7 ! k=-num
23296942
AP
268 ba .Lsub
269 subcc %g0,%g0,%g0 ! clear %icc.c
270.align 16
68ea6068
AP
271.Lsub:
272 ld [$tp+%o7],%o0
273 ld [$np+%o7],%o1
7d9cf7c0 274 subccc %o0,%o1,%o1 ! tp[j]-np[j]
23296942 275 add $rp,%o7,$i
68ea6068
AP
276 add %o7,4,%o7
277 brnz %o7,.Lsub
23296942 278 st %o1,[$i]
774ff8fe 279 subccc $car2,0,$car2 ! handle upmost overflow bit
68ea6068
AP
280 sub %g0,$num,%o7
281
68ea6068 282.Lcopy:
774ff8fe
AP
283 ld [$tp+%o7],%o1 ! conditional copy
284 ld [$rp+%o7],%o0
7d9cf7c0 285 st %g0,[$tp+%o7] ! zap tp
774ff8fe 286 movcs %icc,%o1,%o0
68ea6068
AP
287 st %o0,[$rp+%o7]
288 add %o7,4,%o7
289 brnz %o7,.Lcopy
290 nop
68ea6068
AP
291 mov 1,%i0
292 ret
293 restore
294___
295\f
296########
a00e414f
AP
297######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
298######## code without following dedicated squaring procedure.
68ea6068 299########
120a9e1a 300$sbit="%o5";
68ea6068
AP
301
302$code.=<<___;
303.align 32
304.Lbn_sqr_mont:
68ea6068
AP
305 mulx $mul0,$mul0,$car0 ! ap[0]*ap[0]
306 mulx $apj,$mul0,$tmp0 !prologue!
307 and $car0,$mask,$acc0
308 add %sp,$bias+$frame,$tp
309 ld [$ap+8],$apj !prologue!
310
311 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
312 srlx $car0,32,$car0
313 and $mul1,$mask,$mul1
314
315 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
316 mulx $npj,$mul1,$acc1 !prologue!
317 and $car0,1,$sbit
318 ld [$np+8],$npj !prologue!
319 srlx $car0,1,$car0
320 add $acc0,$car1,$car1
321 srlx $car1,32,$car1
322 mov $tmp0,$acc0 !prologue!
323
324.Lsqr_1st:
325 mulx $apj,$mul0,$tmp0
326 mulx $npj,$mul1,$tmp1
327 add $acc0,$car0,$car0 ! ap[j]*a0+c0
328 add $acc1,$car1,$car1
329 ld [$ap+$j],$apj ! ap[j]
330 and $car0,$mask,$acc0
331 ld [$np+$j],$npj ! np[j]
332 srlx $car0,32,$car0
333 add $acc0,$acc0,$acc0
334 or $sbit,$acc0,$acc0
335 mov $tmp1,$acc1
336 srlx $acc0,32,$sbit
337 add $j,4,$j ! j++
338 and $acc0,$mask,$acc0
339 cmp $j,$num
340 add $acc0,$car1,$car1
341 st $car1,[$tp]
342 mov $tmp0,$acc0
343 srlx $car1,32,$car1
344 bl %icc,.Lsqr_1st
345 add $tp,4,$tp ! tp++
346!.Lsqr_1st
347
348 mulx $apj,$mul0,$tmp0 ! epilogue
349 mulx $npj,$mul1,$tmp1
350 add $acc0,$car0,$car0 ! ap[j]*a0+c0
351 add $acc1,$car1,$car1
352 and $car0,$mask,$acc0
353 srlx $car0,32,$car0
354 add $acc0,$acc0,$acc0
355 or $sbit,$acc0,$acc0
356 srlx $acc0,32,$sbit
357 and $acc0,$mask,$acc0
358 add $acc0,$car1,$car1
359 st $car1,[$tp]
360 srlx $car1,32,$car1
361
362 add $tmp0,$car0,$car0 ! ap[j]*a0+c0
363 add $tmp1,$car1,$car1
364 and $car0,$mask,$acc0
365 srlx $car0,32,$car0
366 add $acc0,$acc0,$acc0
367 or $sbit,$acc0,$acc0
368 srlx $acc0,32,$sbit
369 and $acc0,$mask,$acc0
370 add $acc0,$car1,$car1
371 st $car1,[$tp+4]
372 srlx $car1,32,$car1
373
374 add $car0,$car0,$car0
375 or $sbit,$car0,$car0
376 add $car0,$car1,$car1
377 st $car1,[$tp+8]
378 srlx $car1,32,$car2
379\f
380 ld [%sp+$bias+$frame],$tmp0 ! tp[0]
381 ld [%sp+$bias+$frame+4],$tmp1 ! tp[1]
382 ld [%sp+$bias+$frame+8],$tpj ! tp[2]
383 ld [$ap+4],$mul0 ! ap[1]
384 ld [$ap+8],$apj ! ap[2]
385 ld [$np],$car1 ! np[0]
386 ld [$np+4],$npj ! np[1]
387 mulx $n0,$tmp0,$mul1
388
389 mulx $mul0,$mul0,$car0
390 and $mul1,$mask,$mul1
391
392 mulx $car1,$mul1,$car1
393 mulx $npj,$mul1,$acc1
394 add $tmp0,$car1,$car1
395 and $car0,$mask,$acc0
396 ld [$np+8],$npj ! np[2]
397 srlx $car1,32,$car1
398 add $tmp1,$car1,$car1
399 srlx $car0,32,$car0
400 add $acc0,$car1,$car1
401 and $car0,1,$sbit
402 add $acc1,$car1,$car1
403 srlx $car0,1,$car0
404 mov 12,$j
405 st $car1,[%sp+$bias+$frame] ! tp[0]=
406 srlx $car1,32,$car1
407 add %sp,$bias+$frame+4,$tp
408
409.Lsqr_2nd:
410 mulx $apj,$mul0,$acc0
411 mulx $npj,$mul1,$acc1
412 add $acc0,$car0,$car0
120a9e1a 413 add $tpj,$sbit,$sbit
68ea6068
AP
414 ld [$ap+$j],$apj ! ap[j]
415 and $car0,$mask,$acc0
416 ld [$np+$j],$npj ! np[j]
417 srlx $car0,32,$car0
418 add $acc1,$car1,$car1
419 ld [$tp+8],$tpj ! tp[j]
420 add $acc0,$acc0,$acc0
421 add $j,4,$j ! j++
120a9e1a 422 add $sbit,$acc0,$acc0
68ea6068
AP
423 srlx $acc0,32,$sbit
424 and $acc0,$mask,$acc0
425 cmp $j,$num
426 add $acc0,$car1,$car1
427 st $car1,[$tp] ! tp[j-1]
428 srlx $car1,32,$car1
429 bl %icc,.Lsqr_2nd
430 add $tp,4,$tp ! tp++
431!.Lsqr_2nd
432
433 mulx $apj,$mul0,$acc0
434 mulx $npj,$mul1,$acc1
435 add $acc0,$car0,$car0
120a9e1a 436 add $tpj,$sbit,$sbit
68ea6068
AP
437 and $car0,$mask,$acc0
438 srlx $car0,32,$car0
439 add $acc1,$car1,$car1
440 add $acc0,$acc0,$acc0
120a9e1a 441 add $sbit,$acc0,$acc0
68ea6068
AP
442 srlx $acc0,32,$sbit
443 and $acc0,$mask,$acc0
444 add $acc0,$car1,$car1
445 st $car1,[$tp] ! tp[j-1]
446 srlx $car1,32,$car1
447
448 add $car0,$car0,$car0
120a9e1a 449 add $sbit,$car0,$car0
68ea6068
AP
450 add $car0,$car1,$car1
451 add $car2,$car1,$car1
452 st $car1,[$tp+4]
453 srlx $car1,32,$car2
454\f
455 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
456 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
457 ld [$ap+8],$mul0 ! ap[2]
458 ld [$np],$car1 ! np[0]
459 ld [$np+4],$npj ! np[1]
460 mulx $n0,$tmp1,$mul1
461 and $mul1,$mask,$mul1
462 mov 8,$i
463
464 mulx $mul0,$mul0,$car0
465 mulx $car1,$mul1,$car1
466 and $car0,$mask,$acc0
467 add $tmp1,$car1,$car1
468 srlx $car0,32,$car0
469 add %sp,$bias+$frame,$tp
470 srlx $car1,32,$car1
471 and $car0,1,$sbit
472 srlx $car0,1,$car0
473 mov 4,$j
474
475.Lsqr_outer:
476.Lsqr_inner1:
477 mulx $npj,$mul1,$acc1
478 add $tpj,$car1,$car1
479 add $j,4,$j
480 ld [$tp+8],$tpj
481 cmp $j,$i
482 add $acc1,$car1,$car1
483 ld [$np+$j],$npj
484 st $car1,[$tp]
485 srlx $car1,32,$car1
486 bl %icc,.Lsqr_inner1
487 add $tp,4,$tp
488!.Lsqr_inner1
489
490 add $j,4,$j
491 ld [$ap+$j],$apj ! ap[j]
492 mulx $npj,$mul1,$acc1
493 add $tpj,$car1,$car1
494 ld [$np+$j],$npj ! np[j]
f55ef97b
AP
495 srlx $car1,32,$tmp0
496 and $car1,$mask,$car1
497 add $tmp0,$sbit,$sbit
68ea6068
AP
498 add $acc0,$car1,$car1
499 ld [$tp+8],$tpj ! tp[j]
500 add $acc1,$car1,$car1
501 st $car1,[$tp]
502 srlx $car1,32,$car1
503
504 add $j,4,$j
505 cmp $j,$num
506 be,pn %icc,.Lsqr_no_inner2
507 add $tp,4,$tp
508
509.Lsqr_inner2:
510 mulx $apj,$mul0,$acc0
511 mulx $npj,$mul1,$acc1
120a9e1a 512 add $tpj,$sbit,$sbit
68ea6068
AP
513 add $acc0,$car0,$car0
514 ld [$ap+$j],$apj ! ap[j]
515 and $car0,$mask,$acc0
516 ld [$np+$j],$npj ! np[j]
517 srlx $car0,32,$car0
518 add $acc0,$acc0,$acc0
519 ld [$tp+8],$tpj ! tp[j]
120a9e1a 520 add $sbit,$acc0,$acc0
68ea6068
AP
521 add $j,4,$j ! j++
522 srlx $acc0,32,$sbit
523 and $acc0,$mask,$acc0
524 cmp $j,$num
525 add $acc0,$car1,$car1
526 add $acc1,$car1,$car1
527 st $car1,[$tp] ! tp[j-1]
528 srlx $car1,32,$car1
529 bl %icc,.Lsqr_inner2
530 add $tp,4,$tp ! tp++
531
532.Lsqr_no_inner2:
533 mulx $apj,$mul0,$acc0
534 mulx $npj,$mul1,$acc1
120a9e1a 535 add $tpj,$sbit,$sbit
68ea6068
AP
536 add $acc0,$car0,$car0
537 and $car0,$mask,$acc0
538 srlx $car0,32,$car0
539 add $acc0,$acc0,$acc0
120a9e1a 540 add $sbit,$acc0,$acc0
68ea6068
AP
541 srlx $acc0,32,$sbit
542 and $acc0,$mask,$acc0
543 add $acc0,$car1,$car1
544 add $acc1,$car1,$car1
545 st $car1,[$tp] ! tp[j-1]
546 srlx $car1,32,$car1
547
548 add $car0,$car0,$car0
120a9e1a 549 add $sbit,$car0,$car0
68ea6068
AP
550 add $car0,$car1,$car1
551 add $car2,$car1,$car1
552 st $car1,[$tp+4]
553 srlx $car1,32,$car2
554\f
555 add $i,4,$i ! i++
556 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
557 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
558 ld [$ap+$i],$mul0 ! ap[j]
559 ld [$np],$car1 ! np[0]
560 ld [$np+4],$npj ! np[1]
561 mulx $n0,$tmp1,$mul1
562 and $mul1,$mask,$mul1
563 add $i,4,$tmp0
564
565 mulx $mul0,$mul0,$car0
566 mulx $car1,$mul1,$car1
567 and $car0,$mask,$acc0
568 add $tmp1,$car1,$car1
569 srlx $car0,32,$car0
570 add %sp,$bias+$frame,$tp
571 srlx $car1,32,$car1
572 and $car0,1,$sbit
573 srlx $car0,1,$car0
574
575 cmp $tmp0,$num ! i<num-1
576 bl %icc,.Lsqr_outer
577 mov 4,$j
578\f
579.Lsqr_last:
580 mulx $npj,$mul1,$acc1
581 add $tpj,$car1,$car1
582 add $j,4,$j
583 ld [$tp+8],$tpj
584 cmp $j,$i
585 add $acc1,$car1,$car1
586 ld [$np+$j],$npj
587 st $car1,[$tp]
588 srlx $car1,32,$car1
589 bl %icc,.Lsqr_last
590 add $tp,4,$tp
591!.Lsqr_last
592
593 mulx $npj,$mul1,$acc1
120a9e1a
AP
594 add $tpj,$acc0,$acc0
595 srlx $acc0,32,$tmp0
596 and $acc0,$mask,$acc0
597 add $tmp0,$sbit,$sbit
68ea6068
AP
598 add $acc0,$car1,$car1
599 add $acc1,$car1,$car1
600 st $car1,[$tp]
601 srlx $car1,32,$car1
602
603 add $car0,$car0,$car0 ! recover $car0
120a9e1a 604 add $sbit,$car0,$car0
68ea6068
AP
605 add $car0,$car1,$car1
606 add $car2,$car1,$car1
607 st $car1,[$tp+4]
608 srlx $car1,32,$car2
609
610 ba .Ltail
611 add $tp,8,$tp
612.type $fname,#function
613.size $fname,(.-$fname)
46f4e1be 614.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
23296942 615.align 32
68ea6068
AP
616___
617$code =~ s/\`([^\`]*)\`/eval($1)/gem;
618print $code;
a21314db 619close STDOUT or die "error closing STDOUT: $!";