]>
Commit | Line | Data |
---|---|---|
dd4a0af3 | 1 | .ident "s390x.S, version 1.1" |
1c7f8707 | 2 | // ==================================================================== |
44c8a5e2 | 3 | // Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. |
1c7f8707 | 4 | // |
44c8a5e2 RS |
5 | // Licensed under the OpenSSL license (the "License"). You may not use |
6 | // this file except in compliance with the License. You can obtain a copy | |
7 | // in the file LICENSE in the source distribution or at | |
8 | // https://www.openssl.org/source/license.html | |
1c7f8707 AP |
9 | // ==================================================================== |
10 | ||
11 | .text | |
12 | ||
13 | #define zero %r0 | |
14 | ||
15 | // BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); | |
16 | .globl bn_mul_add_words | |
17 | .type bn_mul_add_words,@function | |
18 | .align 4 | |
19 | bn_mul_add_words: | |
20 | lghi zero,0 // zero = 0 | |
9d0e4dc6 AP |
21 | la %r1,0(%r2) // put rp aside [to give way to] |
22 | lghi %r2,0 // return value | |
1c7f8707 AP |
23 | ltgfr %r4,%r4 |
24 | bler %r14 // if (len<=0) return 0; | |
25 | ||
9d0e4dc6 AP |
26 | stmg %r6,%r13,48(%r15) |
27 | lghi %r2,3 | |
28 | lghi %r12,0 // carry = 0 | |
29 | slgr %r1,%r3 // rp-=ap | |
30 | nr %r2,%r4 // len%4 | |
dd4a0af3 AP |
31 | sra %r4,2 // cnt=len/4 |
32 | jz .Loop1_madd // carry is incidentally cleared if branch taken | |
33 | algr zero,zero // clear carry | |
1c7f8707 | 34 | |
9d0e4dc6 AP |
35 | lg %r7,0(%r3) // ap[0] |
36 | lg %r9,8(%r3) // ap[1] | |
1c7f8707 | 37 | mlgr %r6,%r5 // *=w |
9d0e4dc6 AP |
38 | brct %r4,.Loop4_madd |
39 | j .Loop4_madd_tail | |
1c7f8707 | 40 | |
9d0e4dc6 | 41 | .Loop4_madd: |
1c7f8707 | 42 | mlgr %r8,%r5 |
9d0e4dc6 AP |
43 | lg %r11,16(%r3) // ap[i+2] |
44 | alcgr %r7,%r12 // +=carry | |
45 | alcgr %r6,zero | |
46 | alg %r7,0(%r3,%r1) // +=rp[i] | |
47 | stg %r7,0(%r3,%r1) // rp[i]= | |
48 | ||
49 | mlgr %r10,%r5 | |
50 | lg %r13,24(%r3) | |
dd4a0af3 | 51 | alcgr %r9,%r6 |
1c7f8707 | 52 | alcgr %r8,zero |
9d0e4dc6 AP |
53 | alg %r9,8(%r3,%r1) |
54 | stg %r9,8(%r3,%r1) | |
55 | ||
56 | mlgr %r12,%r5 | |
57 | lg %r7,32(%r3) | |
58 | alcgr %r11,%r8 | |
59 | alcgr %r10,zero | |
60 | alg %r11,16(%r3,%r1) | |
61 | stg %r11,16(%r3,%r1) | |
1c7f8707 | 62 | |
1c7f8707 | 63 | mlgr %r6,%r5 |
9d0e4dc6 AP |
64 | lg %r9,40(%r3) |
65 | alcgr %r13,%r10 | |
66 | alcgr %r12,zero | |
67 | alg %r13,24(%r3,%r1) | |
68 | stg %r13,24(%r3,%r1) | |
69 | ||
70 | la %r3,32(%r3) // i+=4 | |
71 | brct %r4,.Loop4_madd | |
1c7f8707 | 72 | |
9d0e4dc6 | 73 | .Loop4_madd_tail: |
1c7f8707 | 74 | mlgr %r8,%r5 |
9d0e4dc6 AP |
75 | lg %r11,16(%r3) |
76 | alcgr %r7,%r12 // +=carry | |
77 | alcgr %r6,zero | |
78 | alg %r7,0(%r3,%r1) // +=rp[i] | |
79 | stg %r7,0(%r3,%r1) // rp[i]= | |
80 | ||
81 | mlgr %r10,%r5 | |
82 | lg %r13,24(%r3) | |
dd4a0af3 | 83 | alcgr %r9,%r6 |
1c7f8707 | 84 | alcgr %r8,zero |
9d0e4dc6 AP |
85 | alg %r9,8(%r3,%r1) |
86 | stg %r9,8(%r3,%r1) | |
1c7f8707 | 87 | |
9d0e4dc6 AP |
88 | mlgr %r12,%r5 |
89 | alcgr %r11,%r8 | |
90 | alcgr %r10,zero | |
91 | alg %r11,16(%r3,%r1) | |
92 | stg %r11,16(%r3,%r1) | |
93 | ||
94 | alcgr %r13,%r10 | |
95 | alcgr %r12,zero | |
96 | alg %r13,24(%r3,%r1) | |
97 | stg %r13,24(%r3,%r1) | |
98 | ||
99 | la %r3,32(%r3) // i+=4 | |
1c7f8707 | 100 | |
9d0e4dc6 AP |
101 | la %r2,1(%r2) // see if len%4 is zero ... |
102 | brct %r2,.Loop1_madd // without touching condition code:-) | |
dd4a0af3 AP |
103 | |
104 | .Lend_madd: | |
9d0e4dc6 AP |
105 | lgr %r2,zero // return value |
106 | alcgr %r2,%r12 // collect even carry bit | |
107 | lmg %r6,%r13,48(%r15) | |
dd4a0af3 | 108 | br %r14 |
1c7f8707 AP |
109 | |
110 | .Loop1_madd: | |
9d0e4dc6 | 111 | lg %r7,0(%r3) // ap[i] |
1c7f8707 | 112 | mlgr %r6,%r5 // *=w |
9d0e4dc6 | 113 | alcgr %r7,%r12 // +=carry |
1c7f8707 | 114 | alcgr %r6,zero |
9d0e4dc6 AP |
115 | alg %r7,0(%r3,%r1) // +=rp[i] |
116 | stg %r7,0(%r3,%r1) // rp[i]= | |
1c7f8707 | 117 | |
9d0e4dc6 AP |
118 | lgr %r12,%r6 |
119 | la %r3,8(%r3) // i++ | |
120 | brct %r2,.Loop1_madd | |
1c7f8707 | 121 | |
dd4a0af3 | 122 | j .Lend_madd |
1c7f8707 AP |
123 | .size bn_mul_add_words,.-bn_mul_add_words |
124 | ||
125 | // BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); | |
126 | .globl bn_mul_words | |
127 | .type bn_mul_words,@function | |
128 | .align 4 | |
129 | bn_mul_words: | |
130 | lghi zero,0 // zero = 0 | |
131 | la %r1,0(%r2) // put rp aside | |
132 | lghi %r2,0 // i=0; | |
133 | ltgfr %r4,%r4 | |
134 | bler %r14 // if (len<=0) return 0; | |
135 | ||
136 | stmg %r6,%r10,48(%r15) | |
dd4a0af3 | 137 | lghi %r10,3 |
1c7f8707 | 138 | lghi %r8,0 // carry = 0 |
dd4a0af3 AP |
139 | nr %r10,%r4 // len%4 |
140 | sra %r4,2 // cnt=len/4 | |
141 | jz .Loop1_mul // carry is incidentally cleared if branch taken | |
142 | algr zero,zero // clear carry | |
1c7f8707 AP |
143 | |
144 | .Loop4_mul: | |
145 | lg %r7,0(%r2,%r3) // ap[i] | |
146 | mlgr %r6,%r5 // *=w | |
dd4a0af3 | 147 | alcgr %r7,%r8 // +=carry |
1c7f8707 AP |
148 | stg %r7,0(%r2,%r1) // rp[i]= |
149 | ||
150 | lg %r9,8(%r2,%r3) | |
151 | mlgr %r8,%r5 | |
dd4a0af3 | 152 | alcgr %r9,%r6 |
1c7f8707 AP |
153 | stg %r9,8(%r2,%r1) |
154 | ||
155 | lg %r7,16(%r2,%r3) | |
156 | mlgr %r6,%r5 | |
dd4a0af3 | 157 | alcgr %r7,%r8 |
1c7f8707 AP |
158 | stg %r7,16(%r2,%r1) |
159 | ||
160 | lg %r9,24(%r2,%r3) | |
161 | mlgr %r8,%r5 | |
dd4a0af3 | 162 | alcgr %r9,%r6 |
1c7f8707 AP |
163 | stg %r9,24(%r2,%r1) |
164 | ||
165 | la %r2,32(%r2) // i+=4 | |
dd4a0af3 | 166 | brct %r4,.Loop4_mul |
1c7f8707 | 167 | |
dd4a0af3 | 168 | la %r10,1(%r10) // see if len%4 is zero ... |
dd128715 | 169 | brct %r10,.Loop1_mul // without touching condition code:-) |
dd4a0af3 AP |
170 | |
171 | .Lend_mul: | |
172 | alcgr %r8,zero // collect carry bit | |
173 | lgr %r2,%r8 | |
174 | lmg %r6,%r10,48(%r15) | |
175 | br %r14 | |
1c7f8707 AP |
176 | |
177 | .Loop1_mul: | |
178 | lg %r7,0(%r2,%r3) // ap[i] | |
179 | mlgr %r6,%r5 // *=w | |
dd4a0af3 | 180 | alcgr %r7,%r8 // +=carry |
1c7f8707 AP |
181 | stg %r7,0(%r2,%r1) // rp[i]= |
182 | ||
183 | lgr %r8,%r6 | |
184 | la %r2,8(%r2) // i++ | |
dd4a0af3 | 185 | brct %r10,.Loop1_mul |
1c7f8707 | 186 | |
dd4a0af3 | 187 | j .Lend_mul |
1c7f8707 AP |
188 | .size bn_mul_words,.-bn_mul_words |
189 | ||
190 | // void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) | |
191 | .globl bn_sqr_words | |
192 | .type bn_sqr_words,@function | |
193 | .align 4 | |
194 | bn_sqr_words: | |
195 | ltgfr %r4,%r4 | |
196 | bler %r14 | |
197 | ||
198 | stmg %r6,%r7,48(%r15) | |
199 | srag %r1,%r4,2 // cnt=len/4 | |
200 | jz .Loop1_sqr | |
201 | ||
202 | .Loop4_sqr: | |
203 | lg %r7,0(%r3) | |
204 | mlgr %r6,%r7 | |
205 | stg %r7,0(%r2) | |
206 | stg %r6,8(%r2) | |
207 | ||
208 | lg %r7,8(%r3) | |
209 | mlgr %r6,%r7 | |
210 | stg %r7,16(%r2) | |
211 | stg %r6,24(%r2) | |
212 | ||
213 | lg %r7,16(%r3) | |
214 | mlgr %r6,%r7 | |
215 | stg %r7,32(%r2) | |
216 | stg %r6,40(%r2) | |
217 | ||
218 | lg %r7,24(%r3) | |
219 | mlgr %r6,%r7 | |
220 | stg %r7,48(%r2) | |
221 | stg %r6,56(%r2) | |
222 | ||
223 | la %r3,32(%r3) | |
224 | la %r2,64(%r2) | |
225 | brct %r1,.Loop4_sqr | |
226 | ||
227 | lghi %r1,3 | |
228 | nr %r4,%r1 // cnt=len%4 | |
229 | jz .Lend_sqr | |
230 | ||
231 | .Loop1_sqr: | |
232 | lg %r7,0(%r3) | |
233 | mlgr %r6,%r7 | |
234 | stg %r7,0(%r2) | |
235 | stg %r6,8(%r2) | |
236 | ||
237 | la %r3,8(%r3) | |
238 | la %r2,16(%r2) | |
239 | brct %r4,.Loop1_sqr | |
240 | ||
241 | .Lend_sqr: | |
242 | lmg %r6,%r7,48(%r15) | |
243 | br %r14 | |
244 | .size bn_sqr_words,.-bn_sqr_words | |
245 | ||
246 | // BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d); | |
247 | .globl bn_div_words | |
248 | .type bn_div_words,@function | |
249 | .align 4 | |
250 | bn_div_words: | |
251 | dlgr %r2,%r4 | |
252 | lgr %r2,%r3 | |
253 | br %r14 | |
254 | .size bn_div_words,.-bn_div_words | |
255 | ||
256 | // BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); | |
257 | .globl bn_add_words | |
258 | .type bn_add_words,@function | |
259 | .align 4 | |
260 | bn_add_words: | |
261 | la %r1,0(%r2) // put rp aside | |
262 | lghi %r2,0 // i=0 | |
263 | ltgfr %r5,%r5 | |
264 | bler %r14 // if (len<=0) return 0; | |
265 | ||
266 | stg %r6,48(%r15) | |
267 | lghi %r6,3 | |
268 | nr %r6,%r5 // len%4 | |
269 | sra %r5,2 // len/4, use sra because it sets condition code | |
270 | jz .Loop1_add // carry is incidentally cleared if branch taken | |
271 | algr %r2,%r2 // clear carry | |
272 | ||
273 | .Loop4_add: | |
274 | lg %r0,0(%r2,%r3) | |
275 | alcg %r0,0(%r2,%r4) | |
276 | stg %r0,0(%r2,%r1) | |
277 | lg %r0,8(%r2,%r3) | |
278 | alcg %r0,8(%r2,%r4) | |
279 | stg %r0,8(%r2,%r1) | |
280 | lg %r0,16(%r2,%r3) | |
281 | alcg %r0,16(%r2,%r4) | |
282 | stg %r0,16(%r2,%r1) | |
283 | lg %r0,24(%r2,%r3) | |
284 | alcg %r0,24(%r2,%r4) | |
285 | stg %r0,24(%r2,%r1) | |
286 | ||
287 | la %r2,32(%r2) // i+=4 | |
288 | brct %r5,.Loop4_add | |
289 | ||
290 | la %r6,1(%r6) // see if len%4 is zero ... | |
291 | brct %r6,.Loop1_add // without touching condition code:-) | |
292 | ||
293 | .Lexit_add: | |
294 | lghi %r2,0 | |
295 | alcgr %r2,%r2 | |
296 | lg %r6,48(%r15) | |
297 | br %r14 | |
298 | ||
299 | .Loop1_add: | |
300 | lg %r0,0(%r2,%r3) | |
301 | alcg %r0,0(%r2,%r4) | |
302 | stg %r0,0(%r2,%r1) | |
303 | ||
304 | la %r2,8(%r2) // i++ | |
305 | brct %r6,.Loop1_add | |
306 | ||
307 | j .Lexit_add | |
308 | .size bn_add_words,.-bn_add_words | |
309 | ||
310 | // BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); | |
311 | .globl bn_sub_words | |
312 | .type bn_sub_words,@function | |
313 | .align 4 | |
314 | bn_sub_words: | |
315 | la %r1,0(%r2) // put rp aside | |
316 | lghi %r2,0 // i=0 | |
317 | ltgfr %r5,%r5 | |
318 | bler %r14 // if (len<=0) return 0; | |
319 | ||
320 | stg %r6,48(%r15) | |
321 | lghi %r6,3 | |
322 | nr %r6,%r5 // len%4 | |
323 | sra %r5,2 // len/4, use sra because it sets condition code | |
324 | jnz .Loop4_sub // borrow is incidentally cleared if branch taken | |
325 | slgr %r2,%r2 // clear borrow | |
326 | ||
327 | .Loop1_sub: | |
328 | lg %r0,0(%r2,%r3) | |
329 | slbg %r0,0(%r2,%r4) | |
330 | stg %r0,0(%r2,%r1) | |
331 | ||
332 | la %r2,8(%r2) // i++ | |
333 | brct %r6,.Loop1_sub | |
334 | j .Lexit_sub | |
335 | ||
336 | .Loop4_sub: | |
337 | lg %r0,0(%r2,%r3) | |
338 | slbg %r0,0(%r2,%r4) | |
339 | stg %r0,0(%r2,%r1) | |
340 | lg %r0,8(%r2,%r3) | |
341 | slbg %r0,8(%r2,%r4) | |
342 | stg %r0,8(%r2,%r1) | |
343 | lg %r0,16(%r2,%r3) | |
344 | slbg %r0,16(%r2,%r4) | |
345 | stg %r0,16(%r2,%r1) | |
346 | lg %r0,24(%r2,%r3) | |
347 | slbg %r0,24(%r2,%r4) | |
348 | stg %r0,24(%r2,%r1) | |
349 | ||
350 | la %r2,32(%r2) // i+=4 | |
351 | brct %r5,.Loop4_sub | |
352 | ||
353 | la %r6,1(%r6) // see if len%4 is zero ... | |
354 | brct %r6,.Loop1_sub // without touching condition code:-) | |
355 | ||
356 | .Lexit_sub: | |
357 | lghi %r2,0 | |
358 | slbgr %r2,%r2 | |
359 | lcgr %r2,%r2 | |
360 | lg %r6,48(%r15) | |
361 | br %r14 | |
362 | .size bn_sub_words,.-bn_sub_words | |
363 | ||
364 | #define c1 %r1 | |
365 | #define c2 %r5 | |
366 | #define c3 %r8 | |
367 | ||
368 | #define mul_add_c(ai,bi,c1,c2,c3) \ | |
369 | lg %r7,ai*8(%r3); \ | |
370 | mlg %r6,bi*8(%r4); \ | |
371 | algr c1,%r7; \ | |
372 | alcgr c2,%r6; \ | |
373 | alcgr c3,zero | |
374 | ||
375 | // void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); | |
376 | .globl bn_mul_comba8 | |
377 | .type bn_mul_comba8,@function | |
378 | .align 4 | |
379 | bn_mul_comba8: | |
380 | stmg %r6,%r8,48(%r15) | |
381 | ||
382 | lghi c1,0 | |
383 | lghi c2,0 | |
384 | lghi c3,0 | |
385 | lghi zero,0 | |
386 | ||
387 | mul_add_c(0,0,c1,c2,c3); | |
388 | stg c1,0*8(%r2) | |
389 | lghi c1,0 | |
390 | ||
391 | mul_add_c(0,1,c2,c3,c1); | |
392 | mul_add_c(1,0,c2,c3,c1); | |
393 | stg c2,1*8(%r2) | |
394 | lghi c2,0 | |
395 | ||
396 | mul_add_c(2,0,c3,c1,c2); | |
397 | mul_add_c(1,1,c3,c1,c2); | |
398 | mul_add_c(0,2,c3,c1,c2); | |
399 | stg c3,2*8(%r2) | |
400 | lghi c3,0 | |
401 | ||
402 | mul_add_c(0,3,c1,c2,c3); | |
403 | mul_add_c(1,2,c1,c2,c3); | |
404 | mul_add_c(2,1,c1,c2,c3); | |
405 | mul_add_c(3,0,c1,c2,c3); | |
406 | stg c1,3*8(%r2) | |
407 | lghi c1,0 | |
408 | ||
409 | mul_add_c(4,0,c2,c3,c1); | |
410 | mul_add_c(3,1,c2,c3,c1); | |
411 | mul_add_c(2,2,c2,c3,c1); | |
412 | mul_add_c(1,3,c2,c3,c1); | |
413 | mul_add_c(0,4,c2,c3,c1); | |
414 | stg c2,4*8(%r2) | |
415 | lghi c2,0 | |
416 | ||
417 | mul_add_c(0,5,c3,c1,c2); | |
418 | mul_add_c(1,4,c3,c1,c2); | |
419 | mul_add_c(2,3,c3,c1,c2); | |
420 | mul_add_c(3,2,c3,c1,c2); | |
421 | mul_add_c(4,1,c3,c1,c2); | |
422 | mul_add_c(5,0,c3,c1,c2); | |
423 | stg c3,5*8(%r2) | |
424 | lghi c3,0 | |
425 | ||
426 | mul_add_c(6,0,c1,c2,c3); | |
427 | mul_add_c(5,1,c1,c2,c3); | |
428 | mul_add_c(4,2,c1,c2,c3); | |
429 | mul_add_c(3,3,c1,c2,c3); | |
430 | mul_add_c(2,4,c1,c2,c3); | |
431 | mul_add_c(1,5,c1,c2,c3); | |
432 | mul_add_c(0,6,c1,c2,c3); | |
433 | stg c1,6*8(%r2) | |
434 | lghi c1,0 | |
435 | ||
436 | mul_add_c(0,7,c2,c3,c1); | |
437 | mul_add_c(1,6,c2,c3,c1); | |
438 | mul_add_c(2,5,c2,c3,c1); | |
439 | mul_add_c(3,4,c2,c3,c1); | |
440 | mul_add_c(4,3,c2,c3,c1); | |
441 | mul_add_c(5,2,c2,c3,c1); | |
442 | mul_add_c(6,1,c2,c3,c1); | |
443 | mul_add_c(7,0,c2,c3,c1); | |
444 | stg c2,7*8(%r2) | |
445 | lghi c2,0 | |
446 | ||
447 | mul_add_c(7,1,c3,c1,c2); | |
448 | mul_add_c(6,2,c3,c1,c2); | |
449 | mul_add_c(5,3,c3,c1,c2); | |
450 | mul_add_c(4,4,c3,c1,c2); | |
451 | mul_add_c(3,5,c3,c1,c2); | |
452 | mul_add_c(2,6,c3,c1,c2); | |
453 | mul_add_c(1,7,c3,c1,c2); | |
454 | stg c3,8*8(%r2) | |
455 | lghi c3,0 | |
456 | ||
457 | mul_add_c(2,7,c1,c2,c3); | |
458 | mul_add_c(3,6,c1,c2,c3); | |
459 | mul_add_c(4,5,c1,c2,c3); | |
460 | mul_add_c(5,4,c1,c2,c3); | |
461 | mul_add_c(6,3,c1,c2,c3); | |
462 | mul_add_c(7,2,c1,c2,c3); | |
463 | stg c1,9*8(%r2) | |
464 | lghi c1,0 | |
465 | ||
466 | mul_add_c(7,3,c2,c3,c1); | |
467 | mul_add_c(6,4,c2,c3,c1); | |
468 | mul_add_c(5,5,c2,c3,c1); | |
469 | mul_add_c(4,6,c2,c3,c1); | |
470 | mul_add_c(3,7,c2,c3,c1); | |
471 | stg c2,10*8(%r2) | |
472 | lghi c2,0 | |
473 | ||
474 | mul_add_c(4,7,c3,c1,c2); | |
475 | mul_add_c(5,6,c3,c1,c2); | |
476 | mul_add_c(6,5,c3,c1,c2); | |
477 | mul_add_c(7,4,c3,c1,c2); | |
478 | stg c3,11*8(%r2) | |
479 | lghi c3,0 | |
480 | ||
481 | mul_add_c(7,5,c1,c2,c3); | |
482 | mul_add_c(6,6,c1,c2,c3); | |
483 | mul_add_c(5,7,c1,c2,c3); | |
484 | stg c1,12*8(%r2) | |
485 | lghi c1,0 | |
486 | ||
487 | ||
488 | mul_add_c(6,7,c2,c3,c1); | |
489 | mul_add_c(7,6,c2,c3,c1); | |
490 | stg c2,13*8(%r2) | |
491 | lghi c2,0 | |
492 | ||
493 | mul_add_c(7,7,c3,c1,c2); | |
494 | stg c3,14*8(%r2) | |
495 | stg c1,15*8(%r2) | |
496 | ||
497 | lmg %r6,%r8,48(%r15) | |
498 | br %r14 | |
499 | .size bn_mul_comba8,.-bn_mul_comba8 | |
500 | ||
501 | // void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); | |
502 | .globl bn_mul_comba4 | |
503 | .type bn_mul_comba4,@function | |
504 | .align 4 | |
505 | bn_mul_comba4: | |
506 | stmg %r6,%r8,48(%r15) | |
507 | ||
508 | lghi c1,0 | |
509 | lghi c2,0 | |
510 | lghi c3,0 | |
511 | lghi zero,0 | |
512 | ||
513 | mul_add_c(0,0,c1,c2,c3); | |
514 | stg c1,0*8(%r3) | |
515 | lghi c1,0 | |
516 | ||
517 | mul_add_c(0,1,c2,c3,c1); | |
518 | mul_add_c(1,0,c2,c3,c1); | |
519 | stg c2,1*8(%r2) | |
520 | lghi c2,0 | |
521 | ||
522 | mul_add_c(2,0,c3,c1,c2); | |
523 | mul_add_c(1,1,c3,c1,c2); | |
524 | mul_add_c(0,2,c3,c1,c2); | |
525 | stg c3,2*8(%r2) | |
526 | lghi c3,0 | |
527 | ||
528 | mul_add_c(0,3,c1,c2,c3); | |
529 | mul_add_c(1,2,c1,c2,c3); | |
530 | mul_add_c(2,1,c1,c2,c3); | |
531 | mul_add_c(3,0,c1,c2,c3); | |
532 | stg c1,3*8(%r2) | |
533 | lghi c1,0 | |
534 | ||
535 | mul_add_c(3,1,c2,c3,c1); | |
536 | mul_add_c(2,2,c2,c3,c1); | |
537 | mul_add_c(1,3,c2,c3,c1); | |
538 | stg c2,4*8(%r2) | |
539 | lghi c2,0 | |
540 | ||
541 | mul_add_c(2,3,c3,c1,c2); | |
542 | mul_add_c(3,2,c3,c1,c2); | |
543 | stg c3,5*8(%r2) | |
544 | lghi c3,0 | |
545 | ||
546 | mul_add_c(3,3,c1,c2,c3); | |
547 | stg c1,6*8(%r2) | |
548 | stg c2,7*8(%r2) | |
549 | ||
550 | stmg %r6,%r8,48(%r15) | |
551 | br %r14 | |
552 | .size bn_mul_comba4,.-bn_mul_comba4 | |
553 | ||
554 | #define sqr_add_c(ai,c1,c2,c3) \ | |
555 | lg %r7,ai*8(%r3); \ | |
556 | mlgr %r6,%r7; \ | |
557 | algr c1,%r7; \ | |
558 | alcgr c2,%r6; \ | |
559 | alcgr c3,zero | |
560 | ||
561 | #define sqr_add_c2(ai,aj,c1,c2,c3) \ | |
562 | lg %r7,ai*8(%r3); \ | |
563 | mlg %r6,aj*8(%r3); \ | |
564 | algr c1,%r7; \ | |
565 | alcgr c2,%r6; \ | |
566 | alcgr c3,zero; \ | |
567 | algr c1,%r7; \ | |
568 | alcgr c2,%r6; \ | |
569 | alcgr c3,zero | |
570 | ||
571 | // void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3); | |
572 | .globl bn_sqr_comba8 | |
573 | .type bn_sqr_comba8,@function | |
574 | .align 4 | |
575 | bn_sqr_comba8: | |
576 | stmg %r6,%r8,48(%r15) | |
577 | ||
578 | lghi c1,0 | |
579 | lghi c2,0 | |
580 | lghi c3,0 | |
581 | lghi zero,0 | |
582 | ||
583 | sqr_add_c(0,c1,c2,c3); | |
584 | stg c1,0*8(%r2) | |
585 | lghi c1,0 | |
586 | ||
587 | sqr_add_c2(1,0,c2,c3,c1); | |
588 | stg c2,1*8(%r2) | |
589 | lghi c2,0 | |
590 | ||
591 | sqr_add_c(1,c3,c1,c2); | |
592 | sqr_add_c2(2,0,c3,c1,c2); | |
593 | stg c3,2*8(%r2) | |
594 | lghi c3,0 | |
595 | ||
596 | sqr_add_c2(3,0,c1,c2,c3); | |
597 | sqr_add_c2(2,1,c1,c2,c3); | |
598 | stg c1,3*8(%r2) | |
599 | lghi c1,0 | |
600 | ||
601 | sqr_add_c(2,c2,c3,c1); | |
602 | sqr_add_c2(3,1,c2,c3,c1); | |
603 | sqr_add_c2(4,0,c2,c3,c1); | |
604 | stg c2,4*8(%r2) | |
605 | lghi c2,0 | |
606 | ||
607 | sqr_add_c2(5,0,c3,c1,c2); | |
608 | sqr_add_c2(4,1,c3,c1,c2); | |
609 | sqr_add_c2(3,2,c3,c1,c2); | |
610 | stg c3,5*8(%r2) | |
611 | lghi c3,0 | |
612 | ||
613 | sqr_add_c(3,c1,c2,c3); | |
614 | sqr_add_c2(4,2,c1,c2,c3); | |
615 | sqr_add_c2(5,1,c1,c2,c3); | |
616 | sqr_add_c2(6,0,c1,c2,c3); | |
617 | stg c1,6*8(%r2) | |
618 | lghi c1,0 | |
619 | ||
620 | sqr_add_c2(7,0,c2,c3,c1); | |
621 | sqr_add_c2(6,1,c2,c3,c1); | |
622 | sqr_add_c2(5,2,c2,c3,c1); | |
623 | sqr_add_c2(4,3,c2,c3,c1); | |
624 | stg c2,7*8(%r2) | |
625 | lghi c2,0 | |
626 | ||
627 | sqr_add_c(4,c3,c1,c2); | |
628 | sqr_add_c2(5,3,c3,c1,c2); | |
629 | sqr_add_c2(6,2,c3,c1,c2); | |
630 | sqr_add_c2(7,1,c3,c1,c2); | |
631 | stg c3,8*8(%r2) | |
632 | lghi c3,0 | |
633 | ||
634 | sqr_add_c2(7,2,c1,c2,c3); | |
635 | sqr_add_c2(6,3,c1,c2,c3); | |
636 | sqr_add_c2(5,4,c1,c2,c3); | |
637 | stg c1,9*8(%r2) | |
638 | lghi c1,0 | |
639 | ||
640 | sqr_add_c(5,c2,c3,c1); | |
641 | sqr_add_c2(6,4,c2,c3,c1); | |
642 | sqr_add_c2(7,3,c2,c3,c1); | |
643 | stg c2,10*8(%r2) | |
644 | lghi c2,0 | |
645 | ||
646 | sqr_add_c2(7,4,c3,c1,c2); | |
647 | sqr_add_c2(6,5,c3,c1,c2); | |
648 | stg c3,11*8(%r2) | |
649 | lghi c3,0 | |
650 | ||
651 | sqr_add_c(6,c1,c2,c3); | |
652 | sqr_add_c2(7,5,c1,c2,c3); | |
653 | stg c1,12*8(%r2) | |
654 | lghi c1,0 | |
655 | ||
656 | sqr_add_c2(7,6,c2,c3,c1); | |
657 | stg c2,13*8(%r2) | |
658 | lghi c2,0 | |
659 | ||
660 | sqr_add_c(7,c3,c1,c2); | |
661 | stg c3,14*8(%r2) | |
662 | stg c1,15*8(%r2) | |
663 | ||
664 | lmg %r6,%r8,48(%r15) | |
665 | br %r14 | |
666 | .size bn_sqr_comba8,.-bn_sqr_comba8 | |
667 | ||
668 | // void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3); | |
669 | .globl bn_sqr_comba4 | |
670 | .type bn_sqr_comba4,@function | |
671 | .align 4 | |
672 | bn_sqr_comba4: | |
673 | stmg %r6,%r8,48(%r15) | |
674 | ||
675 | lghi c1,0 | |
676 | lghi c2,0 | |
677 | lghi c3,0 | |
678 | lghi zero,0 | |
679 | ||
680 | sqr_add_c(0,c1,c2,c3); | |
681 | stg c1,0*8(%r2) | |
682 | lghi c1,0 | |
683 | ||
684 | sqr_add_c2(1,0,c2,c3,c1); | |
685 | stg c2,1*8(%r2) | |
686 | lghi c2,0 | |
687 | ||
688 | sqr_add_c(1,c3,c1,c2); | |
689 | sqr_add_c2(2,0,c3,c1,c2); | |
690 | stg c3,2*8(%r2) | |
691 | lghi c3,0 | |
692 | ||
693 | sqr_add_c2(3,0,c1,c2,c3); | |
694 | sqr_add_c2(2,1,c1,c2,c3); | |
695 | stg c1,3*8(%r2) | |
696 | lghi c1,0 | |
697 | ||
698 | sqr_add_c(2,c2,c3,c1); | |
699 | sqr_add_c2(3,1,c2,c3,c1); | |
700 | stg c2,4*8(%r2) | |
701 | lghi c2,0 | |
702 | ||
703 | sqr_add_c2(3,2,c3,c1,c2); | |
704 | stg c3,5*8(%r2) | |
705 | lghi c3,0 | |
706 | ||
707 | sqr_add_c(3,c1,c2,c3); | |
708 | stg c1,6*8(%r2) | |
709 | stg c2,7*8(%r2) | |
710 | ||
711 | lmg %r6,%r8,48(%r15) | |
712 | br %r14 | |
713 | .size bn_sqr_comba4,.-bn_sqr_comba4 |