]>
Commit | Line | Data |
---|---|---|
fccbb9b3 | 1 | .ident "sparcv8.s, Version 1.4" |
e3713c36 | 2 | .ident "SPARC v8 ISA artwork by Andy Polyakov <appro@openssl.org>" |
5e85b6ab UM |
3 | |
4 | /* | |
5 | * ==================================================================== | |
44c8a5e2 | 6 | * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved. |
5e85b6ab | 7 | * |
367ace68 | 8 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
44c8a5e2 RS |
9 | * this file except in compliance with the License. You can obtain a copy |
10 | * in the file LICENSE in the source distribution or at | |
11 | * https://www.openssl.org/source/license.html | |
5e85b6ab UM |
12 | * ==================================================================== |
13 | */ | |
14 | ||
15 | /* | |
46f4e1be | 16 | * This is my modest contribution to OpenSSL project (see |
5e85b6ab UM |
17 | * http://www.openssl.org/ for more information about it) and is |
18 | * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c | |
19 | * module. For updates see http://fy.chalmers.se/~appro/hpe/. | |
20 | * | |
21 | * See bn_asm.sparc.v8plus.S for more details. | |
22 | */ | |
23 | ||
24 | /* | |
25 | * Revision history. | |
26 | * | |
4f5fac80 UM |
27 | * 1.1 - new loop unrolling model(*); |
28 | * 1.2 - made gas friendly; | |
98f1c689 | 29 | * 1.3 - fixed problem with /usr/ccs/lib/cpp; |
fccbb9b3 | 30 | * 1.4 - some retunes; |
5e85b6ab UM |
31 | * |
32 | * (*) see bn_asm.sparc.v8plus.S for details | |
33 | */ | |
34 | ||
35 | .section ".text",#alloc,#execinstr | |
4f5fac80 | 36 | .file "bn_asm.sparc.v8.S" |
5e85b6ab UM |
37 | |
38 | .align 32 | |
39 | ||
40 | .global bn_mul_add_words | |
41 | /* | |
42 | * BN_ULONG bn_mul_add_words(rp,ap,num,w) | |
43 | * BN_ULONG *rp,*ap; | |
44 | * int num; | |
45 | * BN_ULONG w; | |
46 | */ | |
47 | bn_mul_add_words: | |
48 | cmp %o2,0 | |
49 | bg,a .L_bn_mul_add_words_proceed | |
50 | ld [%o1],%g2 | |
51 | retl | |
52 | clr %o0 | |
53 | ||
54 | .L_bn_mul_add_words_proceed: | |
55 | andcc %o2,-4,%g0 | |
56 | bz .L_bn_mul_add_words_tail | |
57 | clr %o5 | |
58 | ||
5e85b6ab UM |
59 | .L_bn_mul_add_words_loop: |
60 | ld [%o0],%o4 | |
fccbb9b3 | 61 | ld [%o1+4],%g3 |
5e85b6ab UM |
62 | umul %o3,%g2,%g2 |
63 | rd %y,%g1 | |
64 | addcc %o4,%o5,%o4 | |
5e85b6ab UM |
65 | addx %g1,0,%g1 |
66 | addcc %o4,%g2,%o4 | |
5e85b6ab | 67 | st %o4,[%o0] |
fccbb9b3 | 68 | addx %g1,0,%o5 |
5e85b6ab | 69 | |
5e85b6ab | 70 | ld [%o0+4],%o4 |
fccbb9b3 | 71 | ld [%o1+8],%g2 |
5e85b6ab UM |
72 | umul %o3,%g3,%g3 |
73 | dec 4,%o2 | |
74 | rd %y,%g1 | |
75 | addcc %o4,%o5,%o4 | |
5e85b6ab UM |
76 | addx %g1,0,%g1 |
77 | addcc %o4,%g3,%o4 | |
5e85b6ab | 78 | st %o4,[%o0+4] |
fccbb9b3 | 79 | addx %g1,0,%o5 |
5e85b6ab UM |
80 | |
81 | ld [%o0+8],%o4 | |
fccbb9b3 | 82 | ld [%o1+12],%g3 |
5e85b6ab UM |
83 | umul %o3,%g2,%g2 |
84 | inc 16,%o1 | |
85 | rd %y,%g1 | |
86 | addcc %o4,%o5,%o4 | |
5e85b6ab UM |
87 | addx %g1,0,%g1 |
88 | addcc %o4,%g2,%o4 | |
5e85b6ab | 89 | st %o4,[%o0+8] |
fccbb9b3 | 90 | addx %g1,0,%o5 |
5e85b6ab UM |
91 | |
92 | ld [%o0+12],%o4 | |
93 | umul %o3,%g3,%g3 | |
94 | inc 16,%o0 | |
95 | rd %y,%g1 | |
96 | addcc %o4,%o5,%o4 | |
97 | addx %g1,0,%g1 | |
98 | addcc %o4,%g3,%o4 | |
5e85b6ab | 99 | st %o4,[%o0-4] |
fccbb9b3 | 100 | addx %g1,0,%o5 |
5e85b6ab UM |
101 | andcc %o2,-4,%g0 |
102 | bnz,a .L_bn_mul_add_words_loop | |
103 | ld [%o1],%g2 | |
104 | ||
105 | tst %o2 | |
106 | bnz,a .L_bn_mul_add_words_tail | |
107 | ld [%o1],%g2 | |
108 | .L_bn_mul_add_words_return: | |
109 | retl | |
110 | mov %o5,%o0 | |
111 | nop | |
112 | ||
113 | .L_bn_mul_add_words_tail: | |
114 | ld [%o0],%o4 | |
115 | umul %o3,%g2,%g2 | |
116 | addcc %o4,%o5,%o4 | |
117 | rd %y,%g1 | |
118 | addx %g1,0,%g1 | |
119 | addcc %o4,%g2,%o4 | |
120 | addx %g1,0,%o5 | |
121 | deccc %o2 | |
122 | bz .L_bn_mul_add_words_return | |
123 | st %o4,[%o0] | |
124 | ||
125 | ld [%o1+4],%g2 | |
5e85b6ab | 126 | ld [%o0+4],%o4 |
fccbb9b3 | 127 | umul %o3,%g2,%g2 |
5e85b6ab UM |
128 | rd %y,%g1 |
129 | addcc %o4,%o5,%o4 | |
5e85b6ab UM |
130 | addx %g1,0,%g1 |
131 | addcc %o4,%g2,%o4 | |
132 | addx %g1,0,%o5 | |
133 | deccc %o2 | |
134 | bz .L_bn_mul_add_words_return | |
135 | st %o4,[%o0+4] | |
136 | ||
137 | ld [%o1+8],%g2 | |
5e85b6ab | 138 | ld [%o0+8],%o4 |
fccbb9b3 | 139 | umul %o3,%g2,%g2 |
5e85b6ab UM |
140 | rd %y,%g1 |
141 | addcc %o4,%o5,%o4 | |
142 | addx %g1,0,%g1 | |
143 | addcc %o4,%g2,%o4 | |
144 | st %o4,[%o0+8] | |
145 | retl | |
146 | addx %g1,0,%o0 | |
147 | ||
148 | .type bn_mul_add_words,#function | |
149 | .size bn_mul_add_words,(.-bn_mul_add_words) | |
150 | ||
151 | .align 32 | |
152 | ||
153 | .global bn_mul_words | |
154 | /* | |
155 | * BN_ULONG bn_mul_words(rp,ap,num,w) | |
156 | * BN_ULONG *rp,*ap; | |
157 | * int num; | |
158 | * BN_ULONG w; | |
159 | */ | |
160 | bn_mul_words: | |
161 | cmp %o2,0 | |
46f4e1be | 162 | bg,a .L_bn_mul_words_proceed |
5e85b6ab UM |
163 | ld [%o1],%g2 |
164 | retl | |
165 | clr %o0 | |
166 | ||
46f4e1be | 167 | .L_bn_mul_words_proceed: |
5e85b6ab UM |
168 | andcc %o2,-4,%g0 |
169 | bz .L_bn_mul_words_tail | |
170 | clr %o5 | |
171 | ||
172 | .L_bn_mul_words_loop: | |
173 | ld [%o1+4],%g3 | |
174 | umul %o3,%g2,%g2 | |
175 | addcc %g2,%o5,%g2 | |
176 | rd %y,%g1 | |
177 | addx %g1,0,%o5 | |
178 | st %g2,[%o0] | |
179 | ||
180 | ld [%o1+8],%g2 | |
181 | umul %o3,%g3,%g3 | |
182 | addcc %g3,%o5,%g3 | |
183 | rd %y,%g1 | |
184 | dec 4,%o2 | |
185 | addx %g1,0,%o5 | |
186 | st %g3,[%o0+4] | |
187 | ||
188 | ld [%o1+12],%g3 | |
189 | umul %o3,%g2,%g2 | |
190 | addcc %g2,%o5,%g2 | |
191 | rd %y,%g1 | |
192 | inc 16,%o1 | |
193 | st %g2,[%o0+8] | |
194 | addx %g1,0,%o5 | |
195 | ||
196 | umul %o3,%g3,%g3 | |
197 | addcc %g3,%o5,%g3 | |
198 | rd %y,%g1 | |
199 | inc 16,%o0 | |
200 | addx %g1,0,%o5 | |
201 | st %g3,[%o0-4] | |
202 | andcc %o2,-4,%g0 | |
203 | nop | |
204 | bnz,a .L_bn_mul_words_loop | |
205 | ld [%o1],%g2 | |
206 | ||
207 | tst %o2 | |
208 | bnz,a .L_bn_mul_words_tail | |
209 | ld [%o1],%g2 | |
210 | .L_bn_mul_words_return: | |
211 | retl | |
212 | mov %o5,%o0 | |
213 | nop | |
214 | ||
215 | .L_bn_mul_words_tail: | |
216 | umul %o3,%g2,%g2 | |
217 | addcc %g2,%o5,%g2 | |
218 | rd %y,%g1 | |
219 | addx %g1,0,%o5 | |
220 | deccc %o2 | |
221 | bz .L_bn_mul_words_return | |
222 | st %g2,[%o0] | |
223 | nop | |
224 | ||
225 | ld [%o1+4],%g2 | |
226 | umul %o3,%g2,%g2 | |
227 | addcc %g2,%o5,%g2 | |
228 | rd %y,%g1 | |
229 | addx %g1,0,%o5 | |
230 | deccc %o2 | |
231 | bz .L_bn_mul_words_return | |
232 | st %g2,[%o0+4] | |
233 | ||
234 | ld [%o1+8],%g2 | |
235 | umul %o3,%g2,%g2 | |
236 | addcc %g2,%o5,%g2 | |
237 | rd %y,%g1 | |
238 | st %g2,[%o0+8] | |
239 | retl | |
240 | addx %g1,0,%o0 | |
241 | ||
242 | .type bn_mul_words,#function | |
243 | .size bn_mul_words,(.-bn_mul_words) | |
244 | ||
245 | .align 32 | |
246 | .global bn_sqr_words | |
247 | /* | |
248 | * void bn_sqr_words(r,a,n) | |
249 | * BN_ULONG *r,*a; | |
250 | * int n; | |
251 | */ | |
252 | bn_sqr_words: | |
253 | cmp %o2,0 | |
46f4e1be | 254 | bg,a .L_bn_sqr_words_proceed |
5e85b6ab UM |
255 | ld [%o1],%g2 |
256 | retl | |
257 | clr %o0 | |
258 | ||
46f4e1be | 259 | .L_bn_sqr_words_proceed: |
5e85b6ab UM |
260 | andcc %o2,-4,%g0 |
261 | bz .L_bn_sqr_words_tail | |
262 | clr %o5 | |
263 | ||
264 | .L_bn_sqr_words_loop: | |
265 | ld [%o1+4],%g3 | |
266 | umul %g2,%g2,%o4 | |
267 | st %o4,[%o0] | |
268 | rd %y,%o5 | |
269 | st %o5,[%o0+4] | |
270 | ||
271 | ld [%o1+8],%g2 | |
272 | umul %g3,%g3,%o4 | |
273 | dec 4,%o2 | |
274 | st %o4,[%o0+8] | |
275 | rd %y,%o5 | |
276 | st %o5,[%o0+12] | |
277 | nop | |
278 | ||
279 | ld [%o1+12],%g3 | |
280 | umul %g2,%g2,%o4 | |
281 | st %o4,[%o0+16] | |
282 | rd %y,%o5 | |
283 | inc 16,%o1 | |
284 | st %o5,[%o0+20] | |
285 | ||
286 | umul %g3,%g3,%o4 | |
287 | inc 32,%o0 | |
288 | st %o4,[%o0-8] | |
289 | rd %y,%o5 | |
290 | st %o5,[%o0-4] | |
291 | andcc %o2,-4,%g2 | |
292 | bnz,a .L_bn_sqr_words_loop | |
293 | ld [%o1],%g2 | |
294 | ||
295 | tst %o2 | |
296 | nop | |
297 | bnz,a .L_bn_sqr_words_tail | |
298 | ld [%o1],%g2 | |
299 | .L_bn_sqr_words_return: | |
300 | retl | |
301 | clr %o0 | |
302 | ||
303 | .L_bn_sqr_words_tail: | |
304 | umul %g2,%g2,%o4 | |
305 | st %o4,[%o0] | |
306 | deccc %o2 | |
307 | rd %y,%o5 | |
308 | bz .L_bn_sqr_words_return | |
309 | st %o5,[%o0+4] | |
310 | ||
311 | ld [%o1+4],%g2 | |
312 | umul %g2,%g2,%o4 | |
313 | st %o4,[%o0+8] | |
314 | deccc %o2 | |
315 | rd %y,%o5 | |
316 | nop | |
317 | bz .L_bn_sqr_words_return | |
318 | st %o5,[%o0+12] | |
319 | ||
320 | ld [%o1+8],%g2 | |
321 | umul %g2,%g2,%o4 | |
322 | st %o4,[%o0+16] | |
323 | rd %y,%o5 | |
324 | st %o5,[%o0+20] | |
325 | retl | |
326 | clr %o0 | |
327 | ||
328 | .type bn_sqr_words,#function | |
329 | .size bn_sqr_words,(.-bn_sqr_words) | |
330 | ||
331 | .align 32 | |
332 | ||
333 | .global bn_div_words | |
334 | /* | |
335 | * BN_ULONG bn_div_words(h,l,d) | |
336 | * BN_ULONG h,l,d; | |
337 | */ | |
338 | bn_div_words: | |
339 | wr %o0,%y | |
340 | udiv %o1,%o2,%o0 | |
341 | retl | |
342 | nop | |
343 | ||
344 | .type bn_div_words,#function | |
345 | .size bn_div_words,(.-bn_div_words) | |
346 | ||
347 | .align 32 | |
348 | ||
349 | .global bn_add_words | |
350 | /* | |
351 | * BN_ULONG bn_add_words(rp,ap,bp,n) | |
352 | * BN_ULONG *rp,*ap,*bp; | |
353 | * int n; | |
354 | */ | |
355 | bn_add_words: | |
356 | cmp %o3,0 | |
357 | bg,a .L_bn_add_words_proceed | |
358 | ld [%o1],%o4 | |
359 | retl | |
360 | clr %o0 | |
361 | ||
362 | .L_bn_add_words_proceed: | |
363 | andcc %o3,-4,%g0 | |
364 | bz .L_bn_add_words_tail | |
365 | clr %g1 | |
fccbb9b3 AP |
366 | ba .L_bn_add_words_warn_loop |
367 | addcc %g0,0,%g0 ! clear carry flag | |
5e85b6ab UM |
368 | |
369 | .L_bn_add_words_loop: | |
370 | ld [%o1],%o4 | |
fccbb9b3 | 371 | .L_bn_add_words_warn_loop: |
5e85b6ab | 372 | ld [%o2],%o5 |
fccbb9b3 AP |
373 | ld [%o1+4],%g3 |
374 | ld [%o2+4],%g4 | |
375 | dec 4,%o3 | |
5e85b6ab UM |
376 | addxcc %o5,%o4,%o5 |
377 | st %o5,[%o0] | |
378 | ||
fccbb9b3 AP |
379 | ld [%o1+8],%o4 |
380 | ld [%o2+8],%o5 | |
5e85b6ab | 381 | inc 16,%o1 |
fccbb9b3 AP |
382 | addxcc %g3,%g4,%g3 |
383 | st %g3,[%o0+4] | |
1287dabd | 384 | |
fccbb9b3 AP |
385 | ld [%o1-4],%g3 |
386 | ld [%o2+12],%g4 | |
5e85b6ab | 387 | inc 16,%o2 |
5e85b6ab UM |
388 | addxcc %o5,%o4,%o5 |
389 | st %o5,[%o0+8] | |
390 | ||
5e85b6ab | 391 | inc 16,%o0 |
fccbb9b3 AP |
392 | addxcc %g3,%g4,%g3 |
393 | st %g3,[%o0-4] | |
5e85b6ab UM |
394 | addx %g0,0,%g1 |
395 | andcc %o3,-4,%g0 | |
396 | bnz,a .L_bn_add_words_loop | |
397 | addcc %g1,-1,%g0 | |
398 | ||
399 | tst %o3 | |
5e85b6ab UM |
400 | bnz,a .L_bn_add_words_tail |
401 | ld [%o1],%o4 | |
402 | .L_bn_add_words_return: | |
403 | retl | |
404 | mov %g1,%o0 | |
405 | ||
406 | .L_bn_add_words_tail: | |
407 | addcc %g1,-1,%g0 | |
408 | ld [%o2],%o5 | |
409 | addxcc %o5,%o4,%o5 | |
410 | addx %g0,0,%g1 | |
411 | deccc %o3 | |
412 | bz .L_bn_add_words_return | |
413 | st %o5,[%o0] | |
5e85b6ab UM |
414 | |
415 | ld [%o1+4],%o4 | |
416 | addcc %g1,-1,%g0 | |
417 | ld [%o2+4],%o5 | |
418 | addxcc %o5,%o4,%o5 | |
419 | addx %g0,0,%g1 | |
420 | deccc %o3 | |
421 | bz .L_bn_add_words_return | |
422 | st %o5,[%o0+4] | |
423 | ||
424 | ld [%o1+8],%o4 | |
425 | addcc %g1,-1,%g0 | |
426 | ld [%o2+8],%o5 | |
427 | addxcc %o5,%o4,%o5 | |
428 | st %o5,[%o0+8] | |
429 | retl | |
430 | addx %g0,0,%o0 | |
431 | ||
432 | .type bn_add_words,#function | |
433 | .size bn_add_words,(.-bn_add_words) | |
434 | ||
435 | .align 32 | |
436 | ||
437 | .global bn_sub_words | |
438 | /* | |
439 | * BN_ULONG bn_sub_words(rp,ap,bp,n) | |
440 | * BN_ULONG *rp,*ap,*bp; | |
441 | * int n; | |
442 | */ | |
443 | bn_sub_words: | |
444 | cmp %o3,0 | |
445 | bg,a .L_bn_sub_words_proceed | |
446 | ld [%o1],%o4 | |
447 | retl | |
448 | clr %o0 | |
449 | ||
450 | .L_bn_sub_words_proceed: | |
451 | andcc %o3,-4,%g0 | |
452 | bz .L_bn_sub_words_tail | |
453 | clr %g1 | |
5e85b6ab | 454 | ba .L_bn_sub_words_warm_loop |
fccbb9b3 | 455 | addcc %g0,0,%g0 ! clear carry flag |
5e85b6ab UM |
456 | |
457 | .L_bn_sub_words_loop: | |
458 | ld [%o1],%o4 | |
fccbb9b3 | 459 | .L_bn_sub_words_warm_loop: |
5e85b6ab | 460 | ld [%o2],%o5 |
fccbb9b3 AP |
461 | ld [%o1+4],%g3 |
462 | ld [%o2+4],%g4 | |
463 | dec 4,%o3 | |
5e85b6ab UM |
464 | subxcc %o4,%o5,%o5 |
465 | st %o5,[%o0] | |
466 | ||
fccbb9b3 AP |
467 | ld [%o1+8],%o4 |
468 | ld [%o2+8],%o5 | |
5e85b6ab | 469 | inc 16,%o1 |
fccbb9b3 AP |
470 | subxcc %g3,%g4,%g4 |
471 | st %g4,[%o0+4] | |
1287dabd | 472 | |
fccbb9b3 AP |
473 | ld [%o1-4],%g3 |
474 | ld [%o2+12],%g4 | |
5e85b6ab | 475 | inc 16,%o2 |
5e85b6ab UM |
476 | subxcc %o4,%o5,%o5 |
477 | st %o5,[%o0+8] | |
478 | ||
5e85b6ab | 479 | inc 16,%o0 |
fccbb9b3 AP |
480 | subxcc %g3,%g4,%g4 |
481 | st %g4,[%o0-4] | |
5e85b6ab UM |
482 | addx %g0,0,%g1 |
483 | andcc %o3,-4,%g0 | |
484 | bnz,a .L_bn_sub_words_loop | |
485 | addcc %g1,-1,%g0 | |
486 | ||
487 | tst %o3 | |
488 | nop | |
489 | bnz,a .L_bn_sub_words_tail | |
490 | ld [%o1],%o4 | |
491 | .L_bn_sub_words_return: | |
492 | retl | |
493 | mov %g1,%o0 | |
494 | ||
495 | .L_bn_sub_words_tail: | |
496 | addcc %g1,-1,%g0 | |
497 | ld [%o2],%o5 | |
498 | subxcc %o4,%o5,%o5 | |
499 | addx %g0,0,%g1 | |
500 | deccc %o3 | |
501 | bz .L_bn_sub_words_return | |
502 | st %o5,[%o0] | |
503 | nop | |
504 | ||
505 | ld [%o1+4],%o4 | |
506 | addcc %g1,-1,%g0 | |
507 | ld [%o2+4],%o5 | |
508 | subxcc %o4,%o5,%o5 | |
509 | addx %g0,0,%g1 | |
510 | deccc %o3 | |
511 | bz .L_bn_sub_words_return | |
512 | st %o5,[%o0+4] | |
513 | ||
514 | ld [%o1+8],%o4 | |
515 | addcc %g1,-1,%g0 | |
516 | ld [%o2+8],%o5 | |
517 | subxcc %o4,%o5,%o5 | |
518 | st %o5,[%o0+8] | |
519 | retl | |
520 | addx %g0,0,%o0 | |
521 | ||
522 | .type bn_sub_words,#function | |
523 | .size bn_sub_words,(.-bn_sub_words) | |
524 | ||
4f5fac80 | 525 | #define FRAME_SIZE -96 |
5e85b6ab UM |
526 | |
527 | /* | |
528 | * Here is register usage map for *all* routines below. | |
529 | */ | |
4f5fac80 UM |
530 | #define t_1 %o0 |
531 | #define t_2 %o1 | |
532 | #define c_1 %o2 | |
533 | #define c_2 %o3 | |
534 | #define c_3 %o4 | |
535 | ||
98f1c689 UM |
536 | #define ap(I) [%i1+4*I] |
537 | #define bp(I) [%i2+4*I] | |
538 | #define rp(I) [%i0+4*I] | |
4f5fac80 | 539 | |
5e85b6ab | 540 | #define a_0 %l0 |
5e85b6ab | 541 | #define a_1 %l1 |
5e85b6ab | 542 | #define a_2 %l2 |
5e85b6ab | 543 | #define a_3 %l3 |
5e85b6ab | 544 | #define a_4 %l4 |
5e85b6ab | 545 | #define a_5 %l5 |
5e85b6ab | 546 | #define a_6 %l6 |
5e85b6ab | 547 | #define a_7 %l7 |
4f5fac80 UM |
548 | |
549 | #define b_0 %i3 | |
550 | #define b_1 %i4 | |
551 | #define b_2 %i5 | |
552 | #define b_3 %o5 | |
553 | #define b_4 %g1 | |
554 | #define b_5 %g2 | |
555 | #define b_6 %g3 | |
556 | #define b_7 %g4 | |
5e85b6ab UM |
557 | |
558 | .align 32 | |
559 | .global bn_mul_comba8 | |
560 | /* | |
561 | * void bn_mul_comba8(r,a,b) | |
562 | * BN_ULONG *r,*a,*b; | |
563 | */ | |
564 | bn_mul_comba8: | |
565 | save %sp,FRAME_SIZE,%sp | |
98f1c689 UM |
566 | ld ap(0),a_0 |
567 | ld bp(0),b_0 | |
5e85b6ab | 568 | umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3); |
98f1c689 | 569 | ld bp(1),b_1 |
5e85b6ab | 570 | rd %y,c_2 |
98f1c689 | 571 | st c_1,rp(0) !r[0]=c1; |
5e85b6ab UM |
572 | |
573 | umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1); | |
98f1c689 | 574 | ld ap(1),a_1 |
5e85b6ab UM |
575 | addcc c_2,t_1,c_2 |
576 | rd %y,t_2 | |
577 | addxcc %g0,t_2,c_3 != | |
578 | addx %g0,%g0,c_1 | |
98f1c689 | 579 | ld ap(2),a_2 |
5e85b6ab UM |
580 | umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1); |
581 | addcc c_2,t_1,c_2 != | |
582 | rd %y,t_2 | |
583 | addxcc c_3,t_2,c_3 | |
98f1c689 | 584 | st c_2,rp(1) !r[1]=c2; |
5e85b6ab UM |
585 | addx c_1,%g0,c_1 != |
586 | ||
587 | umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); | |
588 | addcc c_3,t_1,c_3 | |
589 | rd %y,t_2 | |
590 | addxcc c_1,t_2,c_1 != | |
591 | addx %g0,%g0,c_2 | |
98f1c689 | 592 | ld bp(2),b_2 |
5e85b6ab UM |
593 | umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); |
594 | addcc c_3,t_1,c_3 != | |
595 | rd %y,t_2 | |
596 | addxcc c_1,t_2,c_1 | |
98f1c689 | 597 | ld bp(3),b_3 |
5e85b6ab UM |
598 | addx c_2,%g0,c_2 != |
599 | umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); | |
600 | addcc c_3,t_1,c_3 | |
601 | rd %y,t_2 | |
602 | addxcc c_1,t_2,c_1 != | |
603 | addx c_2,%g0,c_2 | |
98f1c689 | 604 | st c_3,rp(2) !r[2]=c3; |
5e85b6ab UM |
605 | |
606 | umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); | |
607 | addcc c_1,t_1,c_1 != | |
608 | rd %y,t_2 | |
609 | addxcc c_2,t_2,c_2 | |
610 | addx %g0,%g0,c_3 | |
611 | umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3); | |
612 | addcc c_1,t_1,c_1 | |
613 | rd %y,t_2 | |
614 | addxcc c_2,t_2,c_2 | |
615 | addx c_3,%g0,c_3 != | |
98f1c689 | 616 | ld ap(3),a_3 |
5e85b6ab UM |
617 | umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); |
618 | addcc c_1,t_1,c_1 | |
619 | rd %y,t_2 != | |
620 | addxcc c_2,t_2,c_2 | |
621 | addx c_3,%g0,c_3 | |
98f1c689 | 622 | ld ap(4),a_4 |
5e85b6ab UM |
623 | umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!= |
624 | addcc c_1,t_1,c_1 | |
625 | rd %y,t_2 | |
626 | addxcc c_2,t_2,c_2 | |
627 | addx c_3,%g0,c_3 != | |
98f1c689 | 628 | st c_1,rp(3) !r[3]=c1; |
5e85b6ab UM |
629 | |
630 | umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1); | |
631 | addcc c_2,t_1,c_2 | |
632 | rd %y,t_2 != | |
633 | addxcc c_3,t_2,c_3 | |
634 | addx %g0,%g0,c_1 | |
635 | umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); | |
636 | addcc c_2,t_1,c_2 != | |
637 | rd %y,t_2 | |
638 | addxcc c_3,t_2,c_3 | |
639 | addx c_1,%g0,c_1 | |
640 | umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1); | |
641 | addcc c_2,t_1,c_2 | |
642 | rd %y,t_2 | |
643 | addxcc c_3,t_2,c_3 | |
644 | addx c_1,%g0,c_1 != | |
98f1c689 | 645 | ld bp(4),b_4 |
5e85b6ab UM |
646 | umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); |
647 | addcc c_2,t_1,c_2 | |
648 | rd %y,t_2 != | |
649 | addxcc c_3,t_2,c_3 | |
650 | addx c_1,%g0,c_1 | |
98f1c689 | 651 | ld bp(5),b_5 |
5e85b6ab UM |
652 | umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1); |
653 | addcc c_2,t_1,c_2 | |
654 | rd %y,t_2 | |
655 | addxcc c_3,t_2,c_3 | |
656 | addx c_1,%g0,c_1 != | |
98f1c689 | 657 | st c_2,rp(4) !r[4]=c2; |
5e85b6ab UM |
658 | |
659 | umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2); | |
660 | addcc c_3,t_1,c_3 | |
661 | rd %y,t_2 != | |
662 | addxcc c_1,t_2,c_1 | |
663 | addx %g0,%g0,c_2 | |
664 | umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2); | |
665 | addcc c_3,t_1,c_3 != | |
666 | rd %y,t_2 | |
667 | addxcc c_1,t_2,c_1 | |
668 | addx c_2,%g0,c_2 | |
669 | umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2); | |
670 | addcc c_3,t_1,c_3 | |
671 | rd %y,t_2 | |
672 | addxcc c_1,t_2,c_1 | |
673 | addx c_2,%g0,c_2 != | |
674 | umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); | |
675 | addcc c_3,t_1,c_3 | |
676 | rd %y,t_2 | |
677 | addxcc c_1,t_2,c_1 != | |
678 | addx c_2,%g0,c_2 | |
98f1c689 | 679 | ld ap(5),a_5 |
5e85b6ab UM |
680 | umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2); |
681 | addcc c_3,t_1,c_3 != | |
682 | rd %y,t_2 | |
683 | addxcc c_1,t_2,c_1 | |
98f1c689 | 684 | ld ap(6),a_6 |
5e85b6ab UM |
685 | addx c_2,%g0,c_2 != |
686 | umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2); | |
687 | addcc c_3,t_1,c_3 | |
688 | rd %y,t_2 | |
689 | addxcc c_1,t_2,c_1 != | |
690 | addx c_2,%g0,c_2 | |
98f1c689 | 691 | st c_3,rp(5) !r[5]=c3; |
5e85b6ab UM |
692 | |
693 | umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3); | |
694 | addcc c_1,t_1,c_1 != | |
695 | rd %y,t_2 | |
696 | addxcc c_2,t_2,c_2 | |
697 | addx %g0,%g0,c_3 | |
698 | umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3); | |
699 | addcc c_1,t_1,c_1 | |
700 | rd %y,t_2 | |
701 | addxcc c_2,t_2,c_2 | |
702 | addx c_3,%g0,c_3 != | |
703 | umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3); | |
704 | addcc c_1,t_1,c_1 | |
705 | rd %y,t_2 | |
706 | addxcc c_2,t_2,c_2 != | |
707 | addx c_3,%g0,c_3 | |
708 | umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); | |
709 | addcc c_1,t_1,c_1 | |
710 | rd %y,t_2 != | |
711 | addxcc c_2,t_2,c_2 | |
712 | addx c_3,%g0,c_3 | |
713 | umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3); | |
714 | addcc c_1,t_1,c_1 != | |
715 | rd %y,t_2 | |
716 | addxcc c_2,t_2,c_2 | |
98f1c689 | 717 | ld bp(6),b_6 |
5e85b6ab UM |
718 | addx c_3,%g0,c_3 != |
719 | umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3); | |
720 | addcc c_1,t_1,c_1 | |
721 | rd %y,t_2 | |
722 | addxcc c_2,t_2,c_2 != | |
723 | addx c_3,%g0,c_3 | |
98f1c689 | 724 | ld bp(7),b_7 |
5e85b6ab UM |
725 | umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3); |
726 | addcc c_1,t_1,c_1 != | |
727 | rd %y,t_2 | |
728 | addxcc c_2,t_2,c_2 | |
98f1c689 | 729 | st c_1,rp(6) !r[6]=c1; |
5e85b6ab UM |
730 | addx c_3,%g0,c_3 != |
731 | ||
732 | umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1); | |
733 | addcc c_2,t_1,c_2 | |
734 | rd %y,t_2 | |
735 | addxcc c_3,t_2,c_3 != | |
736 | addx %g0,%g0,c_1 | |
737 | umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1); | |
738 | addcc c_2,t_1,c_2 | |
739 | rd %y,t_2 != | |
740 | addxcc c_3,t_2,c_3 | |
741 | addx c_1,%g0,c_1 | |
742 | umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1); | |
743 | addcc c_2,t_1,c_2 != | |
744 | rd %y,t_2 | |
745 | addxcc c_3,t_2,c_3 | |
746 | addx c_1,%g0,c_1 | |
747 | umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1); | |
748 | addcc c_2,t_1,c_2 | |
749 | rd %y,t_2 | |
750 | addxcc c_3,t_2,c_3 | |
751 | addx c_1,%g0,c_1 != | |
752 | umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1); | |
753 | addcc c_2,t_1,c_2 | |
754 | rd %y,t_2 | |
755 | addxcc c_3,t_2,c_3 != | |
756 | addx c_1,%g0,c_1 | |
757 | umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1); | |
758 | addcc c_2,t_1,c_2 | |
759 | rd %y,t_2 != | |
760 | addxcc c_3,t_2,c_3 | |
761 | addx c_1,%g0,c_1 | |
98f1c689 | 762 | ld ap(7),a_7 |
5e85b6ab UM |
763 | umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1); |
764 | addcc c_2,t_1,c_2 | |
765 | rd %y,t_2 | |
766 | addxcc c_3,t_2,c_3 | |
767 | addx c_1,%g0,c_1 != | |
768 | umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1); | |
769 | addcc c_2,t_1,c_2 | |
770 | rd %y,t_2 | |
771 | addxcc c_3,t_2,c_3 != | |
772 | addx c_1,%g0,c_1 | |
98f1c689 | 773 | st c_2,rp(7) !r[7]=c2; |
5e85b6ab UM |
774 | |
775 | umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2); | |
776 | addcc c_3,t_1,c_3 != | |
777 | rd %y,t_2 | |
778 | addxcc c_1,t_2,c_1 | |
779 | addx %g0,%g0,c_2 | |
780 | umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2); | |
781 | addcc c_3,t_1,c_3 | |
782 | rd %y,t_2 | |
783 | addxcc c_1,t_2,c_1 | |
784 | addx c_2,%g0,c_2 != | |
785 | umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2); | |
786 | addcc c_3,t_1,c_3 | |
787 | rd %y,t_2 | |
788 | addxcc c_1,t_2,c_1 != | |
789 | addx c_2,%g0,c_2 | |
790 | umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2); | |
791 | addcc c_3,t_1,c_3 | |
792 | rd %y,t_2 != | |
793 | addxcc c_1,t_2,c_1 | |
794 | addx c_2,%g0,c_2 | |
795 | umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2); | |
796 | addcc c_3,t_1,c_3 != | |
797 | rd %y,t_2 | |
798 | addxcc c_1,t_2,c_1 | |
799 | addx c_2,%g0,c_2 | |
800 | umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2); | |
801 | addcc c_3,t_1,c_3 | |
802 | rd %y,t_2 | |
803 | addxcc c_1,t_2,c_1 | |
804 | addx c_2,%g0,c_2 != | |
805 | umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2); | |
806 | addcc c_3,t_1,c_3 | |
807 | rd %y,t_2 | |
808 | addxcc c_1,t_2,c_1 ! | |
809 | addx c_2,%g0,c_2 | |
98f1c689 | 810 | st c_3,rp(8) !r[8]=c3; |
5e85b6ab UM |
811 | |
812 | umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3); | |
813 | addcc c_1,t_1,c_1 != | |
814 | rd %y,t_2 | |
815 | addxcc c_2,t_2,c_2 | |
816 | addx %g0,%g0,c_3 | |
817 | umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3); | |
818 | addcc c_1,t_1,c_1 | |
819 | rd %y,t_2 | |
820 | addxcc c_2,t_2,c_2 | |
821 | addx c_3,%g0,c_3 != | |
822 | umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3); | |
823 | addcc c_1,t_1,c_1 | |
824 | rd %y,t_2 | |
825 | addxcc c_2,t_2,c_2 != | |
826 | addx c_3,%g0,c_3 | |
827 | umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3); | |
828 | addcc c_1,t_1,c_1 | |
829 | rd %y,t_2 != | |
830 | addxcc c_2,t_2,c_2 | |
831 | addx c_3,%g0,c_3 | |
832 | umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3); | |
833 | addcc c_1,t_1,c_1 != | |
834 | rd %y,t_2 | |
835 | addxcc c_2,t_2,c_2 | |
836 | addx c_3,%g0,c_3 | |
837 | umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3); | |
838 | addcc c_1,t_1,c_1 | |
839 | rd %y,t_2 | |
840 | addxcc c_2,t_2,c_2 | |
841 | addx c_3,%g0,c_3 != | |
98f1c689 | 842 | st c_1,rp(9) !r[9]=c1; |
5e85b6ab UM |
843 | |
844 | umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1); | |
845 | addcc c_2,t_1,c_2 | |
846 | rd %y,t_2 != | |
847 | addxcc c_3,t_2,c_3 | |
848 | addx %g0,%g0,c_1 | |
849 | umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1); | |
850 | addcc c_2,t_1,c_2 != | |
851 | rd %y,t_2 | |
852 | addxcc c_3,t_2,c_3 | |
853 | addx c_1,%g0,c_1 | |
854 | umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1); | |
855 | addcc c_2,t_1,c_2 | |
856 | rd %y,t_2 | |
857 | addxcc c_3,t_2,c_3 | |
858 | addx c_1,%g0,c_1 != | |
859 | umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1); | |
860 | addcc c_2,t_1,c_2 | |
861 | rd %y,t_2 | |
862 | addxcc c_3,t_2,c_3 != | |
863 | addx c_1,%g0,c_1 | |
864 | umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1); | |
865 | addcc c_2,t_1,c_2 | |
866 | rd %y,t_2 != | |
867 | addxcc c_3,t_2,c_3 | |
868 | addx c_1,%g0,c_1 | |
98f1c689 | 869 | st c_2,rp(10) !r[10]=c2; |
5e85b6ab UM |
870 | |
871 | umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2); | |
872 | addcc c_3,t_1,c_3 | |
873 | rd %y,t_2 | |
874 | addxcc c_1,t_2,c_1 | |
875 | addx %g0,%g0,c_2 != | |
876 | umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2); | |
877 | addcc c_3,t_1,c_3 | |
878 | rd %y,t_2 | |
879 | addxcc c_1,t_2,c_1 != | |
880 | addx c_2,%g0,c_2 | |
881 | umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2); | |
882 | addcc c_3,t_1,c_3 | |
883 | rd %y,t_2 != | |
884 | addxcc c_1,t_2,c_1 | |
885 | addx c_2,%g0,c_2 | |
886 | umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2); | |
887 | addcc c_3,t_1,c_3 != | |
888 | rd %y,t_2 | |
889 | addxcc c_1,t_2,c_1 | |
98f1c689 | 890 | st c_3,rp(11) !r[11]=c3; |
5e85b6ab UM |
891 | addx c_2,%g0,c_2 != |
892 | ||
893 | umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3); | |
894 | addcc c_1,t_1,c_1 | |
895 | rd %y,t_2 | |
896 | addxcc c_2,t_2,c_2 != | |
897 | addx %g0,%g0,c_3 | |
898 | umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3); | |
899 | addcc c_1,t_1,c_1 | |
900 | rd %y,t_2 != | |
901 | addxcc c_2,t_2,c_2 | |
902 | addx c_3,%g0,c_3 | |
903 | umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3); | |
904 | addcc c_1,t_1,c_1 != | |
905 | rd %y,t_2 | |
906 | addxcc c_2,t_2,c_2 | |
98f1c689 | 907 | st c_1,rp(12) !r[12]=c1; |
5e85b6ab UM |
908 | addx c_3,%g0,c_3 != |
909 | ||
910 | umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1); | |
911 | addcc c_2,t_1,c_2 | |
912 | rd %y,t_2 | |
913 | addxcc c_3,t_2,c_3 != | |
914 | addx %g0,%g0,c_1 | |
915 | umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1); | |
916 | addcc c_2,t_1,c_2 | |
917 | rd %y,t_2 != | |
918 | addxcc c_3,t_2,c_3 | |
919 | addx c_1,%g0,c_1 | |
98f1c689 | 920 | st c_2,rp(13) !r[13]=c2; |
5e85b6ab UM |
921 | |
922 | umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2); | |
923 | addcc c_3,t_1,c_3 | |
924 | rd %y,t_2 | |
925 | addxcc c_1,t_2,c_1 | |
926 | nop != | |
98f1c689 UM |
927 | st c_3,rp(14) !r[14]=c3; |
928 | st c_1,rp(15) !r[15]=c1; | |
5e85b6ab UM |
929 | |
930 | ret | |
931 | restore %g0,%g0,%o0 | |
932 | ||
933 | .type bn_mul_comba8,#function | |
934 | .size bn_mul_comba8,(.-bn_mul_comba8) | |
935 | ||
936 | .align 32 | |
937 | ||
938 | .global bn_mul_comba4 | |
939 | /* | |
940 | * void bn_mul_comba4(r,a,b) | |
941 | * BN_ULONG *r,*a,*b; | |
942 | */ | |
943 | bn_mul_comba4: | |
944 | save %sp,FRAME_SIZE,%sp | |
98f1c689 UM |
945 | ld ap(0),a_0 |
946 | ld bp(0),b_0 | |
5e85b6ab | 947 | umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3); |
98f1c689 | 948 | ld bp(1),b_1 |
5e85b6ab | 949 | rd %y,c_2 |
98f1c689 | 950 | st c_1,rp(0) !r[0]=c1; |
5e85b6ab UM |
951 | |
952 | umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1); | |
98f1c689 | 953 | ld ap(1),a_1 |
5e85b6ab UM |
954 | addcc c_2,t_1,c_2 |
955 | rd %y,t_2 != | |
956 | addxcc %g0,t_2,c_3 | |
957 | addx %g0,%g0,c_1 | |
98f1c689 | 958 | ld ap(2),a_2 |
5e85b6ab UM |
959 | umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); |
960 | addcc c_2,t_1,c_2 | |
961 | rd %y,t_2 | |
962 | addxcc c_3,t_2,c_3 | |
963 | addx c_1,%g0,c_1 != | |
98f1c689 | 964 | st c_2,rp(1) !r[1]=c2; |
5e85b6ab UM |
965 | |
966 | umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); | |
967 | addcc c_3,t_1,c_3 | |
968 | rd %y,t_2 != | |
969 | addxcc c_1,t_2,c_1 | |
970 | addx %g0,%g0,c_2 | |
98f1c689 | 971 | ld bp(2),b_2 |
5e85b6ab UM |
972 | umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2); |
973 | addcc c_3,t_1,c_3 | |
974 | rd %y,t_2 | |
975 | addxcc c_1,t_2,c_1 | |
976 | addx c_2,%g0,c_2 != | |
98f1c689 | 977 | ld bp(3),b_3 |
5e85b6ab UM |
978 | umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); |
979 | addcc c_3,t_1,c_3 | |
980 | rd %y,t_2 != | |
981 | addxcc c_1,t_2,c_1 | |
982 | addx c_2,%g0,c_2 | |
98f1c689 | 983 | st c_3,rp(2) !r[2]=c3; |
5e85b6ab UM |
984 | |
985 | umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3); | |
986 | addcc c_1,t_1,c_1 | |
987 | rd %y,t_2 | |
988 | addxcc c_2,t_2,c_2 | |
989 | addx %g0,%g0,c_3 != | |
990 | umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3); | |
991 | addcc c_1,t_1,c_1 | |
992 | rd %y,t_2 | |
993 | addxcc c_2,t_2,c_2 != | |
994 | addx c_3,%g0,c_3 | |
98f1c689 | 995 | ld ap(3),a_3 |
5e85b6ab UM |
996 | umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); |
997 | addcc c_1,t_1,c_1 != | |
998 | rd %y,t_2 | |
999 | addxcc c_2,t_2,c_2 | |
1000 | addx c_3,%g0,c_3 | |
1001 | umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3); | |
1002 | addcc c_1,t_1,c_1 | |
1003 | rd %y,t_2 | |
1004 | addxcc c_2,t_2,c_2 | |
1005 | addx c_3,%g0,c_3 != | |
98f1c689 | 1006 | st c_1,rp(3) !r[3]=c1; |
5e85b6ab UM |
1007 | |
1008 | umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); | |
1009 | addcc c_2,t_1,c_2 | |
1010 | rd %y,t_2 != | |
1011 | addxcc c_3,t_2,c_3 | |
1012 | addx %g0,%g0,c_1 | |
1013 | umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1); | |
1014 | addcc c_2,t_1,c_2 != | |
1015 | rd %y,t_2 | |
1016 | addxcc c_3,t_2,c_3 | |
1017 | addx c_1,%g0,c_1 | |
1018 | umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1); | |
1019 | addcc c_2,t_1,c_2 | |
1020 | rd %y,t_2 | |
1021 | addxcc c_3,t_2,c_3 | |
1022 | addx c_1,%g0,c_1 != | |
98f1c689 | 1023 | st c_2,rp(4) !r[4]=c2; |
5e85b6ab UM |
1024 | |
1025 | umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); | |
1026 | addcc c_3,t_1,c_3 | |
1027 | rd %y,t_2 != | |
1028 | addxcc c_1,t_2,c_1 | |
1029 | addx %g0,%g0,c_2 | |
1030 | umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); | |
1031 | addcc c_3,t_1,c_3 != | |
1032 | rd %y,t_2 | |
1033 | addxcc c_1,t_2,c_1 | |
98f1c689 | 1034 | st c_3,rp(5) !r[5]=c3; |
5e85b6ab UM |
1035 | addx c_2,%g0,c_2 != |
1036 | ||
1037 | umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); | |
1038 | addcc c_1,t_1,c_1 | |
1039 | rd %y,t_2 | |
1040 | addxcc c_2,t_2,c_2 != | |
98f1c689 UM |
1041 | st c_1,rp(6) !r[6]=c1; |
1042 | st c_2,rp(7) !r[7]=c2; | |
1287dabd | 1043 | |
5e85b6ab UM |
1044 | ret |
1045 | restore %g0,%g0,%o0 | |
1046 | ||
1047 | .type bn_mul_comba4,#function | |
1048 | .size bn_mul_comba4,(.-bn_mul_comba4) | |
1049 | ||
1050 | .align 32 | |
1051 | ||
1052 | .global bn_sqr_comba8 | |
1053 | bn_sqr_comba8: | |
1054 | save %sp,FRAME_SIZE,%sp | |
98f1c689 UM |
1055 | ld ap(0),a_0 |
1056 | ld ap(1),a_1 | |
5e85b6ab UM |
1057 | umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3); |
1058 | rd %y,c_2 | |
98f1c689 | 1059 | st c_1,rp(0) !r[0]=c1; |
5e85b6ab | 1060 | |
98f1c689 | 1061 | ld ap(2),a_2 |
5e85b6ab UM |
1062 | umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); |
1063 | addcc c_2,t_1,c_2 | |
1064 | rd %y,t_2 | |
1065 | addxcc %g0,t_2,c_3 | |
1066 | addx %g0,%g0,c_1 != | |
1067 | addcc c_2,t_1,c_2 | |
1068 | addxcc c_3,t_2,c_3 | |
98f1c689 | 1069 | st c_2,rp(1) !r[1]=c2; |
5e85b6ab UM |
1070 | addx c_1,%g0,c_1 != |
1071 | ||
1072 | umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); | |
1073 | addcc c_3,t_1,c_3 | |
1074 | rd %y,t_2 | |
1075 | addxcc c_1,t_2,c_1 != | |
1076 | addx %g0,%g0,c_2 | |
1077 | addcc c_3,t_1,c_3 | |
1078 | addxcc c_1,t_2,c_1 | |
1079 | addx c_2,%g0,c_2 != | |
98f1c689 | 1080 | ld ap(3),a_3 |
5e85b6ab UM |
1081 | umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); |
1082 | addcc c_3,t_1,c_3 | |
1083 | rd %y,t_2 != | |
1084 | addxcc c_1,t_2,c_1 | |
1085 | addx c_2,%g0,c_2 | |
98f1c689 | 1086 | st c_3,rp(2) !r[2]=c3; |
5e85b6ab UM |
1087 | |
1088 | umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3); | |
1089 | addcc c_1,t_1,c_1 | |
1090 | rd %y,t_2 | |
1091 | addxcc c_2,t_2,c_2 | |
1092 | addx %g0,%g0,c_3 != | |
1093 | addcc c_1,t_1,c_1 | |
1094 | addxcc c_2,t_2,c_2 | |
98f1c689 | 1095 | ld ap(4),a_4 |
5e85b6ab UM |
1096 | addx c_3,%g0,c_3 != |
1097 | umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); | |
1098 | addcc c_1,t_1,c_1 | |
1099 | rd %y,t_2 | |
1100 | addxcc c_2,t_2,c_2 != | |
1101 | addx c_3,%g0,c_3 | |
1102 | addcc c_1,t_1,c_1 | |
1103 | addxcc c_2,t_2,c_2 | |
1104 | addx c_3,%g0,c_3 != | |
98f1c689 | 1105 | st c_1,rp(3) !r[3]=c1; |
5e85b6ab UM |
1106 | |
1107 | umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1); | |
1108 | addcc c_2,t_1,c_2 | |
1109 | rd %y,t_2 != | |
1110 | addxcc c_3,t_2,c_3 | |
1111 | addx %g0,%g0,c_1 | |
1112 | addcc c_2,t_1,c_2 | |
1113 | addxcc c_3,t_2,c_3 != | |
1114 | addx c_1,%g0,c_1 | |
1115 | umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); | |
1116 | addcc c_2,t_1,c_2 | |
1117 | rd %y,t_2 != | |
1118 | addxcc c_3,t_2,c_3 | |
1119 | addx c_1,%g0,c_1 | |
1120 | addcc c_2,t_1,c_2 | |
1121 | addxcc c_3,t_2,c_3 != | |
1122 | addx c_1,%g0,c_1 | |
98f1c689 | 1123 | ld ap(5),a_5 |
5e85b6ab UM |
1124 | umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); |
1125 | addcc c_2,t_1,c_2 != | |
1126 | rd %y,t_2 | |
1127 | addxcc c_3,t_2,c_3 | |
98f1c689 | 1128 | st c_2,rp(4) !r[4]=c2; |
5e85b6ab UM |
1129 | addx c_1,%g0,c_1 != |
1130 | ||
1131 | umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2); | |
1132 | addcc c_3,t_1,c_3 | |
1133 | rd %y,t_2 | |
1134 | addxcc c_1,t_2,c_1 != | |
1135 | addx %g0,%g0,c_2 | |
1136 | addcc c_3,t_1,c_3 | |
1137 | addxcc c_1,t_2,c_1 | |
1138 | addx c_2,%g0,c_2 != | |
1139 | umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2); | |
1140 | addcc c_3,t_1,c_3 | |
1141 | rd %y,t_2 | |
1142 | addxcc c_1,t_2,c_1 != | |
1143 | addx c_2,%g0,c_2 | |
1144 | addcc c_3,t_1,c_3 | |
1145 | addxcc c_1,t_2,c_1 | |
1146 | addx c_2,%g0,c_2 != | |
98f1c689 | 1147 | ld ap(6),a_6 |
5e85b6ab UM |
1148 | umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); |
1149 | addcc c_3,t_1,c_3 | |
1150 | rd %y,t_2 != | |
1151 | addxcc c_1,t_2,c_1 | |
1152 | addx c_2,%g0,c_2 | |
1153 | addcc c_3,t_1,c_3 | |
1154 | addxcc c_1,t_2,c_1 != | |
1155 | addx c_2,%g0,c_2 | |
98f1c689 | 1156 | st c_3,rp(5) !r[5]=c3; |
5e85b6ab UM |
1157 | |
1158 | umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3); | |
1159 | addcc c_1,t_1,c_1 != | |
1160 | rd %y,t_2 | |
1161 | addxcc c_2,t_2,c_2 | |
1162 | addx %g0,%g0,c_3 | |
1163 | addcc c_1,t_1,c_1 != | |
1164 | addxcc c_2,t_2,c_2 | |
1165 | addx c_3,%g0,c_3 | |
1166 | umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3); | |
1167 | addcc c_1,t_1,c_1 != | |
1168 | rd %y,t_2 | |
1169 | addxcc c_2,t_2,c_2 | |
1170 | addx c_3,%g0,c_3 | |
1171 | addcc c_1,t_1,c_1 != | |
1172 | addxcc c_2,t_2,c_2 | |
1173 | addx c_3,%g0,c_3 | |
1174 | umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3); | |
1175 | addcc c_1,t_1,c_1 != | |
1176 | rd %y,t_2 | |
1177 | addxcc c_2,t_2,c_2 | |
1178 | addx c_3,%g0,c_3 | |
1179 | addcc c_1,t_1,c_1 != | |
1180 | addxcc c_2,t_2,c_2 | |
1181 | addx c_3,%g0,c_3 | |
98f1c689 | 1182 | ld ap(7),a_7 |
5e85b6ab UM |
1183 | umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3); |
1184 | addcc c_1,t_1,c_1 | |
1185 | rd %y,t_2 | |
1186 | addxcc c_2,t_2,c_2 | |
1187 | addx c_3,%g0,c_3 != | |
98f1c689 | 1188 | st c_1,rp(6) !r[6]=c1; |
5e85b6ab UM |
1189 | |
1190 | umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1); | |
1191 | addcc c_2,t_1,c_2 | |
1192 | rd %y,t_2 != | |
1193 | addxcc c_3,t_2,c_3 | |
1194 | addx %g0,%g0,c_1 | |
1195 | addcc c_2,t_1,c_2 | |
1196 | addxcc c_3,t_2,c_3 != | |
1197 | addx c_1,%g0,c_1 | |
1198 | umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1); | |
1199 | addcc c_2,t_1,c_2 | |
1200 | rd %y,t_2 != | |
1201 | addxcc c_3,t_2,c_3 | |
1202 | addx c_1,%g0,c_1 | |
1203 | addcc c_2,t_1,c_2 | |
1204 | addxcc c_3,t_2,c_3 != | |
1205 | addx c_1,%g0,c_1 | |
1206 | umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1); | |
1207 | addcc c_2,t_1,c_2 | |
1208 | rd %y,t_2 != | |
1209 | addxcc c_3,t_2,c_3 | |
1210 | addx c_1,%g0,c_1 | |
1211 | addcc c_2,t_1,c_2 | |
1212 | addxcc c_3,t_2,c_3 != | |
1213 | addx c_1,%g0,c_1 | |
1214 | umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1); | |
1215 | addcc c_2,t_1,c_2 | |
1216 | rd %y,t_2 != | |
1217 | addxcc c_3,t_2,c_3 | |
1218 | addx c_1,%g0,c_1 | |
1219 | addcc c_2,t_1,c_2 | |
1220 | addxcc c_3,t_2,c_3 != | |
1221 | addx c_1,%g0,c_1 | |
98f1c689 | 1222 | st c_2,rp(7) !r[7]=c2; |
5e85b6ab UM |
1223 | |
1224 | umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2); | |
1225 | addcc c_3,t_1,c_3 != | |
1226 | rd %y,t_2 | |
1227 | addxcc c_1,t_2,c_1 | |
1228 | addx %g0,%g0,c_2 | |
1229 | addcc c_3,t_1,c_3 != | |
1230 | addxcc c_1,t_2,c_1 | |
1231 | addx c_2,%g0,c_2 | |
1232 | umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2); | |
1233 | addcc c_3,t_1,c_3 != | |
1234 | rd %y,t_2 | |
1235 | addxcc c_1,t_2,c_1 | |
1236 | addx c_2,%g0,c_2 | |
1237 | addcc c_3,t_1,c_3 != | |
1238 | addxcc c_1,t_2,c_1 | |
1239 | addx c_2,%g0,c_2 | |
1240 | umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2); | |
1241 | addcc c_3,t_1,c_3 != | |
1242 | rd %y,t_2 | |
1243 | addxcc c_1,t_2,c_1 | |
1244 | addx c_2,%g0,c_2 | |
1245 | addcc c_3,t_1,c_3 != | |
1246 | addxcc c_1,t_2,c_1 | |
1247 | addx c_2,%g0,c_2 | |
1248 | umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2); | |
1249 | addcc c_3,t_1,c_3 != | |
1250 | rd %y,t_2 | |
1251 | addxcc c_1,t_2,c_1 | |
98f1c689 | 1252 | st c_3,rp(8) !r[8]=c3; |
5e85b6ab UM |
1253 | addx c_2,%g0,c_2 != |
1254 | ||
1255 | umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3); | |
1256 | addcc c_1,t_1,c_1 | |
1257 | rd %y,t_2 | |
1258 | addxcc c_2,t_2,c_2 != | |
1259 | addx %g0,%g0,c_3 | |
1260 | addcc c_1,t_1,c_1 | |
1261 | addxcc c_2,t_2,c_2 | |
1262 | addx c_3,%g0,c_3 != | |
1263 | umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3); | |
1264 | addcc c_1,t_1,c_1 | |
1265 | rd %y,t_2 | |
1266 | addxcc c_2,t_2,c_2 != | |
1267 | addx c_3,%g0,c_3 | |
1268 | addcc c_1,t_1,c_1 | |
1269 | addxcc c_2,t_2,c_2 | |
1270 | addx c_3,%g0,c_3 != | |
1271 | umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3); | |
1272 | addcc c_1,t_1,c_1 | |
1273 | rd %y,t_2 | |
1274 | addxcc c_2,t_2,c_2 != | |
1275 | addx c_3,%g0,c_3 | |
1276 | addcc c_1,t_1,c_1 | |
1277 | addxcc c_2,t_2,c_2 | |
1278 | addx c_3,%g0,c_3 != | |
98f1c689 | 1279 | st c_1,rp(9) !r[9]=c1; |
5e85b6ab UM |
1280 | |
1281 | umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1); | |
1282 | addcc c_2,t_1,c_2 | |
1283 | rd %y,t_2 != | |
1284 | addxcc c_3,t_2,c_3 | |
1285 | addx %g0,%g0,c_1 | |
1286 | addcc c_2,t_1,c_2 | |
1287 | addxcc c_3,t_2,c_3 != | |
1288 | addx c_1,%g0,c_1 | |
1289 | umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1); | |
1290 | addcc c_2,t_1,c_2 | |
1291 | rd %y,t_2 != | |
1292 | addxcc c_3,t_2,c_3 | |
1293 | addx c_1,%g0,c_1 | |
1294 | addcc c_2,t_1,c_2 | |
1295 | addxcc c_3,t_2,c_3 != | |
1296 | addx c_1,%g0,c_1 | |
1297 | umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1); | |
1298 | addcc c_2,t_1,c_2 | |
1299 | rd %y,t_2 != | |
1300 | addxcc c_3,t_2,c_3 | |
1301 | addx c_1,%g0,c_1 | |
98f1c689 | 1302 | st c_2,rp(10) !r[10]=c2; |
5e85b6ab UM |
1303 | |
1304 | umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2); | |
1305 | addcc c_3,t_1,c_3 | |
1306 | rd %y,t_2 | |
1307 | addxcc c_1,t_2,c_1 | |
1308 | addx %g0,%g0,c_2 != | |
1309 | addcc c_3,t_1,c_3 | |
1310 | addxcc c_1,t_2,c_1 | |
1311 | addx c_2,%g0,c_2 | |
1312 | umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2); | |
1313 | addcc c_3,t_1,c_3 | |
1314 | rd %y,t_2 | |
1315 | addxcc c_1,t_2,c_1 | |
1316 | addx c_2,%g0,c_2 != | |
1317 | addcc c_3,t_1,c_3 | |
1318 | addxcc c_1,t_2,c_1 | |
98f1c689 | 1319 | st c_3,rp(11) !r[11]=c3; |
5e85b6ab UM |
1320 | addx c_2,%g0,c_2 != |
1321 | ||
1322 | umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3); | |
1323 | addcc c_1,t_1,c_1 | |
1324 | rd %y,t_2 | |
1325 | addxcc c_2,t_2,c_2 != | |
1326 | addx %g0,%g0,c_3 | |
1327 | addcc c_1,t_1,c_1 | |
1328 | addxcc c_2,t_2,c_2 | |
1329 | addx c_3,%g0,c_3 != | |
1330 | umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3); | |
1331 | addcc c_1,t_1,c_1 | |
1332 | rd %y,t_2 | |
1333 | addxcc c_2,t_2,c_2 != | |
1334 | addx c_3,%g0,c_3 | |
98f1c689 | 1335 | st c_1,rp(12) !r[12]=c1; |
5e85b6ab UM |
1336 | |
1337 | umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1); | |
1338 | addcc c_2,t_1,c_2 != | |
1339 | rd %y,t_2 | |
1340 | addxcc c_3,t_2,c_3 | |
1341 | addx %g0,%g0,c_1 | |
1342 | addcc c_2,t_1,c_2 != | |
5e85b6ab | 1343 | addxcc c_3,t_2,c_3 |
98f1c689 | 1344 | st c_2,rp(13) !r[13]=c2; |
5e85b6ab UM |
1345 | addx c_1,%g0,c_1 != |
1346 | ||
1347 | umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2); | |
1348 | addcc c_3,t_1,c_3 | |
1349 | rd %y,t_2 | |
1350 | addxcc c_1,t_2,c_1 != | |
98f1c689 UM |
1351 | st c_3,rp(14) !r[14]=c3; |
1352 | st c_1,rp(15) !r[15]=c1; | |
5e85b6ab UM |
1353 | |
1354 | ret | |
1355 | restore %g0,%g0,%o0 | |
1356 | ||
1357 | .type bn_sqr_comba8,#function | |
1358 | .size bn_sqr_comba8,(.-bn_sqr_comba8) | |
1359 | ||
1360 | .align 32 | |
1361 | ||
1362 | .global bn_sqr_comba4 | |
1363 | /* | |
1364 | * void bn_sqr_comba4(r,a) | |
1365 | * BN_ULONG *r,*a; | |
1366 | */ | |
1367 | bn_sqr_comba4: | |
1368 | save %sp,FRAME_SIZE,%sp | |
98f1c689 | 1369 | ld ap(0),a_0 |
5e85b6ab | 1370 | umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3); |
98f1c689 | 1371 | ld ap(1),a_1 != |
5e85b6ab | 1372 | rd %y,c_2 |
98f1c689 | 1373 | st c_1,rp(0) !r[0]=c1; |
5e85b6ab | 1374 | |
fccbb9b3 | 1375 | ld ap(2),a_2 |
5e85b6ab UM |
1376 | umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); |
1377 | addcc c_2,t_1,c_2 | |
1378 | rd %y,t_2 | |
1379 | addxcc %g0,t_2,c_3 | |
1380 | addx %g0,%g0,c_1 != | |
5e85b6ab UM |
1381 | addcc c_2,t_1,c_2 |
1382 | addxcc c_3,t_2,c_3 | |
1383 | addx c_1,%g0,c_1 != | |
98f1c689 | 1384 | st c_2,rp(1) !r[1]=c2; |
5e85b6ab UM |
1385 | |
1386 | umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); | |
1387 | addcc c_3,t_1,c_3 | |
1388 | rd %y,t_2 != | |
1389 | addxcc c_1,t_2,c_1 | |
1390 | addx %g0,%g0,c_2 | |
1391 | addcc c_3,t_1,c_3 | |
1392 | addxcc c_1,t_2,c_1 != | |
1393 | addx c_2,%g0,c_2 | |
98f1c689 | 1394 | ld ap(3),a_3 |
5e85b6ab UM |
1395 | umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); |
1396 | addcc c_3,t_1,c_3 != | |
1397 | rd %y,t_2 | |
1398 | addxcc c_1,t_2,c_1 | |
98f1c689 | 1399 | st c_3,rp(2) !r[2]=c3; |
5e85b6ab UM |
1400 | addx c_2,%g0,c_2 != |
1401 | ||
1402 | umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); | |
1403 | addcc c_1,t_1,c_1 | |
1404 | rd %y,t_2 | |
1405 | addxcc c_2,t_2,c_2 != | |
1406 | addx %g0,%g0,c_3 | |
1407 | addcc c_1,t_1,c_1 | |
1408 | addxcc c_2,t_2,c_2 | |
1409 | addx c_3,%g0,c_3 != | |
1410 | umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); | |
1411 | addcc c_1,t_1,c_1 | |
1412 | rd %y,t_2 | |
1413 | addxcc c_2,t_2,c_2 != | |
1414 | addx c_3,%g0,c_3 | |
1415 | addcc c_1,t_1,c_1 | |
1416 | addxcc c_2,t_2,c_2 | |
1417 | addx c_3,%g0,c_3 != | |
98f1c689 | 1418 | st c_1,rp(3) !r[3]=c1; |
5e85b6ab UM |
1419 | |
1420 | umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); | |
1421 | addcc c_2,t_1,c_2 | |
1422 | rd %y,t_2 != | |
1423 | addxcc c_3,t_2,c_3 | |
1424 | addx %g0,%g0,c_1 | |
1425 | addcc c_2,t_1,c_2 | |
1426 | addxcc c_3,t_2,c_3 != | |
1427 | addx c_1,%g0,c_1 | |
1428 | umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); | |
1429 | addcc c_2,t_1,c_2 | |
1430 | rd %y,t_2 != | |
1431 | addxcc c_3,t_2,c_3 | |
1432 | addx c_1,%g0,c_1 | |
98f1c689 | 1433 | st c_2,rp(4) !r[4]=c2; |
5e85b6ab UM |
1434 | |
1435 | umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2); | |
1436 | addcc c_3,t_1,c_3 | |
1437 | rd %y,t_2 | |
1438 | addxcc c_1,t_2,c_1 | |
1439 | addx %g0,%g0,c_2 != | |
1440 | addcc c_3,t_1,c_3 | |
1441 | addxcc c_1,t_2,c_1 | |
98f1c689 | 1442 | st c_3,rp(5) !r[5]=c3; |
5e85b6ab UM |
1443 | addx c_2,%g0,c_2 != |
1444 | ||
1445 | umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3); | |
1446 | addcc c_1,t_1,c_1 | |
1447 | rd %y,t_2 | |
1448 | addxcc c_2,t_2,c_2 != | |
98f1c689 UM |
1449 | st c_1,rp(6) !r[6]=c1; |
1450 | st c_2,rp(7) !r[7]=c2; | |
1287dabd | 1451 | |
5e85b6ab UM |
1452 | ret |
1453 | restore %g0,%g0,%o0 | |
1454 | ||
1455 | .type bn_sqr_comba4,#function | |
1456 | .size bn_sqr_comba4,(.-bn_sqr_comba4) | |
5e85b6ab | 1457 | |
4f5fac80 | 1458 | .align 32 |