]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/bn/bn_mul.c
dde0919218e06f61f023df962dfc6fb4a59d6222
[thirdparty/openssl.git] / crypto / bn / bn_mul.c
1 /* crypto/bn/bn_mul.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63
64 #include <assert.h>
65 #include "cryptlib.h"
66 #include "bn_lcl.h"
67
68 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
69 /* Here follows specialised variants of bn_add_words() and
70 bn_sub_words(). They have the property performing operations on
71 arrays of different sizes. The sizes of those arrays is expressed through
72 cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
73 which is the delta between the two lengths, calculated as len(a)-len(b).
74 All lengths are the number of BN_ULONGs... For the operations that require
75 a result array as parameter, it must have the length cl+abs(dl).
76 These functions should probably end up in bn_asm.c as soon as there are
77 assembler counterparts for the systems that use assembler files. */
78
79 BN_ULONG bn_sub_part_words(BN_ULONG *r,
80 const BN_ULONG *a, const BN_ULONG *b,
81 int cl, int dl)
82 {
83 BN_ULONG c, t;
84
85 assert(cl >= 0);
86 c = bn_sub_words(r, a, b, cl);
87
88 if (dl == 0)
89 return c;
90
91 r += cl;
92 a += cl;
93 b += cl;
94
95 if (dl < 0)
96 {
97 for (;;)
98 {
99 t = b[0];
100 r[0] = (0-t-c)&BN_MASK2;
101 if (t != 0) c=1;
102 if (++dl >= 0) break;
103
104 t = b[1];
105 r[1] = (0-t-c)&BN_MASK2;
106 if (t != 0) c=1;
107 if (++dl >= 0) break;
108
109 t = b[2];
110 r[2] = (0-t-c)&BN_MASK2;
111 if (t != 0) c=1;
112 if (++dl >= 0) break;
113
114 t = b[3];
115 r[3] = (0-t-c)&BN_MASK2;
116 if (t != 0) c=1;
117 if (++dl >= 0) break;
118
119 b += 4;
120 r += 4;
121 }
122 }
123 else
124 {
125 int save_dl = dl;
126 while(c)
127 {
128 t = a[0];
129 r[0] = (t-c)&BN_MASK2;
130 if (t != 0) c=0;
131 if (--dl <= 0) break;
132
133 t = a[1];
134 r[1] = (t-c)&BN_MASK2;
135 if (t != 0) c=0;
136 if (--dl <= 0) break;
137
138 t = a[2];
139 r[2] = (t-c)&BN_MASK2;
140 if (t != 0) c=0;
141 if (--dl <= 0) break;
142
143 t = a[3];
144 r[3] = (t-c)&BN_MASK2;
145 if (t != 0) c=0;
146 if (--dl <= 0) break;
147
148 save_dl = dl;
149 a += 4;
150 r += 4;
151 }
152 if (dl > 0)
153 {
154 if (save_dl > dl)
155 {
156 switch (save_dl - dl)
157 {
158 case 1:
159 r[1] = a[1];
160 if (--dl <= 0) break;
161 case 2:
162 r[2] = a[2];
163 if (--dl <= 0) break;
164 case 3:
165 r[3] = a[3];
166 if (--dl <= 0) break;
167 }
168 a += 4;
169 r += 4;
170 }
171 }
172 if (dl > 0)
173 {
174 for(;;)
175 {
176 r[0] = a[0];
177 if (--dl <= 0) break;
178 r[1] = a[1];
179 if (--dl <= 0) break;
180 r[2] = a[2];
181 if (--dl <= 0) break;
182 r[3] = a[3];
183 if (--dl <= 0) break;
184
185 a += 4;
186 r += 4;
187 }
188 }
189 }
190 return c;
191 }
192 #endif
193
194 BN_ULONG bn_add_part_words(BN_ULONG *r,
195 const BN_ULONG *a, const BN_ULONG *b,
196 int cl, int dl)
197 {
198 BN_ULONG c, l, t;
199
200 assert(cl >= 0);
201 c = bn_add_words(r, a, b, cl);
202
203 if (dl == 0)
204 return c;
205
206 r += cl;
207 a += cl;
208 b += cl;
209
210 if (dl < 0)
211 {
212 int save_dl = dl;
213 while (c)
214 {
215 l=(c+b[0])&BN_MASK2;
216 c=(l < c);
217 r[0]=l;
218 if (++dl >= 0) break;
219
220 l=(c+b[1])&BN_MASK2;
221 c=(l < c);
222 r[1]=l;
223 if (++dl >= 0) break;
224
225 l=(c+b[2])&BN_MASK2;
226 c=(l < c);
227 r[2]=l;
228 if (++dl >= 0) break;
229
230 l=(c+b[3])&BN_MASK2;
231 c=(l < c);
232 r[3]=l;
233 if (++dl >= 0) break;
234
235 save_dl = dl;
236 b+=4;
237 r+=4;
238 }
239 if (dl < 0)
240 {
241 if (save_dl < dl)
242 {
243 switch (dl - save_dl)
244 {
245 case 1:
246 r[1] = b[1];
247 if (++dl >= 0) break;
248 case 2:
249 r[2] = b[2];
250 if (++dl >= 0) break;
251 case 3:
252 r[3] = b[3];
253 if (++dl >= 0) break;
254 }
255 b += 4;
256 r += 4;
257 }
258 }
259 if (dl < 0)
260 {
261 for(;;)
262 {
263 r[0] = b[0];
264 if (++dl >= 0) break;
265 r[1] = b[1];
266 if (++dl >= 0) break;
267 r[2] = b[2];
268 if (++dl >= 0) break;
269 r[3] = b[3];
270 if (++dl >= 0) break;
271
272 b += 4;
273 r += 4;
274 }
275 }
276 }
277 else
278 {
279 int save_dl = dl;
280 while (c)
281 {
282 t=(a[0]+c)&BN_MASK2;
283 c=(t < c);
284 r[0]=t;
285 if (--dl <= 0) break;
286
287 t=(a[1]+c)&BN_MASK2;
288 c=(t < c);
289 r[1]=t;
290 if (--dl <= 0) break;
291
292 t=(a[2]+c)&BN_MASK2;
293 c=(t < c);
294 r[2]=t;
295 if (--dl <= 0) break;
296
297 t=(a[3]+c)&BN_MASK2;
298 c=(t < c);
299 r[3]=t;
300 if (--dl <= 0) break;
301
302 save_dl = dl;
303 a+=4;
304 r+=4;
305 }
306 if (dl > 0)
307 {
308 if (save_dl > dl)
309 {
310 switch (save_dl - dl)
311 {
312 case 1:
313 r[1] = a[1];
314 if (--dl <= 0) break;
315 case 2:
316 r[2] = a[2];
317 if (--dl <= 0) break;
318 case 3:
319 r[3] = a[3];
320 if (--dl <= 0) break;
321 }
322 a += 4;
323 r += 4;
324 }
325 }
326 if (dl > 0)
327 {
328 for(;;)
329 {
330 r[0] = a[0];
331 if (--dl <= 0) break;
332 r[1] = a[1];
333 if (--dl <= 0) break;
334 r[2] = a[2];
335 if (--dl <= 0) break;
336 r[3] = a[3];
337 if (--dl <= 0) break;
338
339 a += 4;
340 r += 4;
341 }
342 }
343 }
344 return c;
345 }
346
347 #ifdef BN_RECURSION
348 /* Karatsuba recursive multiplication algorithm
349 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
350
351 /* r is 2*n2 words in size,
352 * a and b are both n2 words in size.
353 * n2 must be a power of 2.
354 * We multiply and return the result.
355 * t must be 2*n2 words in size
356 * We calculate
357 * a[0]*b[0]
358 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
359 * a[1]*b[1]
360 */
361 /* dnX may not be positive, but n2/2+dnX has to be */
362 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
363 int dna, int dnb, BN_ULONG *t)
364 {
365 int n=n2/2,c1,c2;
366 int tna=n+dna, tnb=n+dnb;
367 unsigned int neg,zero;
368 BN_ULONG ln,lo,*p;
369
370 # ifdef BN_MUL_COMBA
371 # if 0
372 if (n2 == 4)
373 {
374 bn_mul_comba4(r,a,b);
375 return;
376 }
377 # endif
378 /* Only call bn_mul_comba 8 if n2 == 8 and the
379 * two arrays are complete [steve]
380 */
381 if (n2 == 8 && dna == 0 && dnb == 0)
382 {
383 bn_mul_comba8(r,a,b);
384 return;
385 }
386 # endif /* BN_MUL_COMBA */
387 /* Else do normal multiply */
388 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
389 {
390 bn_mul_normal(r,a,n2+dna,b,n2+dnb);
391 if ((dna + dnb) < 0)
392 memset(&r[2*n2 + dna + dnb], 0,
393 sizeof(BN_ULONG) * -(dna + dnb));
394 return;
395 }
396 /* r=(a[0]-a[1])*(b[1]-b[0]) */
397 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
398 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
399 zero=neg=0;
400 switch (c1*3+c2)
401 {
402 case -4:
403 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
404 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
405 break;
406 case -3:
407 zero=1;
408 break;
409 case -2:
410 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
411 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
412 neg=1;
413 break;
414 case -1:
415 case 0:
416 case 1:
417 zero=1;
418 break;
419 case 2:
420 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
421 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
422 neg=1;
423 break;
424 case 3:
425 zero=1;
426 break;
427 case 4:
428 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
429 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
430 break;
431 }
432
433 # ifdef BN_MUL_COMBA
434 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
435 extra args to do this well */
436 {
437 if (!zero)
438 bn_mul_comba4(&(t[n2]),t,&(t[n]));
439 else
440 memset(&(t[n2]),0,8*sizeof(BN_ULONG));
441
442 bn_mul_comba4(r,a,b);
443 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
444 }
445 else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
446 take extra args to do this
447 well */
448 {
449 if (!zero)
450 bn_mul_comba8(&(t[n2]),t,&(t[n]));
451 else
452 memset(&(t[n2]),0,16*sizeof(BN_ULONG));
453
454 bn_mul_comba8(r,a,b);
455 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
456 }
457 else
458 # endif /* BN_MUL_COMBA */
459 {
460 p= &(t[n2*2]);
461 if (!zero)
462 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
463 else
464 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
465 bn_mul_recursive(r,a,b,n,0,0,p);
466 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
467 }
468
469 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
470 * r[10] holds (a[0]*b[0])
471 * r[32] holds (b[1]*b[1])
472 */
473
474 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
475
476 if (neg) /* if t[32] is negative */
477 {
478 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
479 }
480 else
481 {
482 /* Might have a carry */
483 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
484 }
485
486 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
487 * r[10] holds (a[0]*b[0])
488 * r[32] holds (b[1]*b[1])
489 * c1 holds the carry bits
490 */
491 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
492 if (c1)
493 {
494 p= &(r[n+n2]);
495 lo= *p;
496 ln=(lo+c1)&BN_MASK2;
497 *p=ln;
498
499 /* The overflow will stop before we over write
500 * words we should not overwrite */
501 if (ln < (BN_ULONG)c1)
502 {
503 do {
504 p++;
505 lo= *p;
506 ln=(lo+1)&BN_MASK2;
507 *p=ln;
508 } while (ln == 0);
509 }
510 }
511 }
512
513 /* n+tn is the word length
514 * t needs to be n*4 is size, as does r */
515 /* tnX may not be negative but less than n */
516 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
517 int tna, int tnb, BN_ULONG *t)
518 {
519 int i,j,n2=n*2;
520 int c1,c2,neg;
521 BN_ULONG ln,lo,*p;
522
523 if (n < 8)
524 {
525 bn_mul_normal(r,a,n+tna,b,n+tnb);
526 return;
527 }
528
529 /* r=(a[0]-a[1])*(b[1]-b[0]) */
530 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
531 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
532 neg=0;
533 switch (c1*3+c2)
534 {
535 case -4:
536 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
537 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
538 break;
539 case -3:
540 /* break; */
541 case -2:
542 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
543 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
544 neg=1;
545 break;
546 case -1:
547 case 0:
548 case 1:
549 /* break; */
550 case 2:
551 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
552 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
553 neg=1;
554 break;
555 case 3:
556 /* break; */
557 case 4:
558 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
559 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
560 break;
561 }
562 /* The zero case isn't yet implemented here. The speedup
563 would probably be negligible. */
564 # if 0
565 if (n == 4)
566 {
567 bn_mul_comba4(&(t[n2]),t,&(t[n]));
568 bn_mul_comba4(r,a,b);
569 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
570 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
571 }
572 else
573 # endif
574 if (n == 8)
575 {
576 bn_mul_comba8(&(t[n2]),t,&(t[n]));
577 bn_mul_comba8(r,a,b);
578 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
579 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
580 }
581 else
582 {
583 p= &(t[n2*2]);
584 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
585 bn_mul_recursive(r,a,b,n,0,0,p);
586 i=n/2;
587 /* If there is only a bottom half to the number,
588 * just do it */
589 if (tna > tnb)
590 j = tna - i;
591 else
592 j = tnb - i;
593 if (j == 0)
594 {
595 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
596 i,tna-i,tnb-i,p);
597 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
598 }
599 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
600 {
601 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
602 i,tna-i,tnb-i,p);
603 memset(&(r[n2+tna+tnb]),0,
604 sizeof(BN_ULONG)*(n2-tna-tnb));
605 }
606 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
607 {
608 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
609 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
610 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
611 {
612 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
613 }
614 else
615 {
616 for (;;)
617 {
618 i/=2;
619 /* these simplified conditions work
620 * exclusively because difference
621 * between tna and tnb is 1 or 0 */
622 if (i < tna || i < tnb)
623 {
624 bn_mul_part_recursive(&(r[n2]),
625 &(a[n]),&(b[n]),
626 i,tna-i,tnb-i,p);
627 break;
628 }
629 else if (i == tna || i == tnb)
630 {
631 bn_mul_recursive(&(r[n2]),
632 &(a[n]),&(b[n]),
633 i,tna-i,tnb-i,p);
634 break;
635 }
636 }
637 }
638 }
639 }
640
641 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
642 * r[10] holds (a[0]*b[0])
643 * r[32] holds (b[1]*b[1])
644 */
645
646 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
647
648 if (neg) /* if t[32] is negative */
649 {
650 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
651 }
652 else
653 {
654 /* Might have a carry */
655 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
656 }
657
658 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
659 * r[10] holds (a[0]*b[0])
660 * r[32] holds (b[1]*b[1])
661 * c1 holds the carry bits
662 */
663 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
664 if (c1)
665 {
666 p= &(r[n+n2]);
667 lo= *p;
668 ln=(lo+c1)&BN_MASK2;
669 *p=ln;
670
671 /* The overflow will stop before we over write
672 * words we should not overwrite */
673 if (ln < (BN_ULONG)c1)
674 {
675 do {
676 p++;
677 lo= *p;
678 ln=(lo+1)&BN_MASK2;
679 *p=ln;
680 } while (ln == 0);
681 }
682 }
683 }
684
685 /* a and b must be the same size, which is n2.
686 * r needs to be n2 words and t needs to be n2*2
687 */
688 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
689 BN_ULONG *t)
690 {
691 int n=n2/2;
692
693 bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
694 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
695 {
696 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
697 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
698 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
699 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
700 }
701 else
702 {
703 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
704 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
705 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
706 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
707 }
708 }
709
710 /* a and b must be the same size, which is n2.
711 * r needs to be n2 words and t needs to be n2*2
712 * l is the low words of the output.
713 * t needs to be n2*3
714 */
715 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
716 BN_ULONG *t)
717 {
718 int i,n;
719 int c1,c2;
720 int neg,oneg,zero;
721 BN_ULONG ll,lc,*lp,*mp;
722
723 n=n2/2;
724
725 /* Calculate (al-ah)*(bh-bl) */
726 neg=zero=0;
727 c1=bn_cmp_words(&(a[0]),&(a[n]),n);
728 c2=bn_cmp_words(&(b[n]),&(b[0]),n);
729 switch (c1*3+c2)
730 {
731 case -4:
732 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
733 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
734 break;
735 case -3:
736 zero=1;
737 break;
738 case -2:
739 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
740 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
741 neg=1;
742 break;
743 case -1:
744 case 0:
745 case 1:
746 zero=1;
747 break;
748 case 2:
749 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
750 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
751 neg=1;
752 break;
753 case 3:
754 zero=1;
755 break;
756 case 4:
757 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
758 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
759 break;
760 }
761
762 oneg=neg;
763 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
764 /* r[10] = (a[1]*b[1]) */
765 # ifdef BN_MUL_COMBA
766 if (n == 8)
767 {
768 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
769 bn_mul_comba8(r,&(a[n]),&(b[n]));
770 }
771 else
772 # endif
773 {
774 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
775 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
776 }
777
778 /* s0 == low(al*bl)
779 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
780 * We know s0 and s1 so the only unknown is high(al*bl)
781 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
782 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
783 */
784 if (l != NULL)
785 {
786 lp= &(t[n2+n]);
787 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
788 }
789 else
790 {
791 c1=0;
792 lp= &(r[0]);
793 }
794
795 if (neg)
796 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
797 else
798 {
799 bn_add_words(&(t[n2]),lp,&(t[0]),n);
800 neg=0;
801 }
802
803 if (l != NULL)
804 {
805 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
806 }
807 else
808 {
809 lp= &(t[n2+n]);
810 mp= &(t[n2]);
811 for (i=0; i<n; i++)
812 lp[i]=((~mp[i])+1)&BN_MASK2;
813 }
814
815 /* s[0] = low(al*bl)
816 * t[3] = high(al*bl)
817 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
818 * r[10] = (a[1]*b[1])
819 */
820 /* R[10] = al*bl
821 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
822 * R[32] = ah*bh
823 */
824 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
825 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
826 * R[3]=r[1]+(carry/borrow)
827 */
828 if (l != NULL)
829 {
830 lp= &(t[n2]);
831 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
832 }
833 else
834 {
835 lp= &(t[n2+n]);
836 c1=0;
837 }
838 c1+=(int)(bn_add_words(&(t[n2]),lp, &(r[0]),n));
839 if (oneg)
840 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
841 else
842 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
843
844 c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
845 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
846 if (oneg)
847 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
848 else
849 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
850
851 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
852 {
853 i=0;
854 if (c1 > 0)
855 {
856 lc=c1;
857 do {
858 ll=(r[i]+lc)&BN_MASK2;
859 r[i++]=ll;
860 lc=(lc > ll);
861 } while (lc);
862 }
863 else
864 {
865 lc= -c1;
866 do {
867 ll=r[i];
868 r[i++]=(ll-lc)&BN_MASK2;
869 lc=(lc > ll);
870 } while (lc);
871 }
872 }
873 if (c2 != 0) /* Add starting at r[1] */
874 {
875 i=n;
876 if (c2 > 0)
877 {
878 lc=c2;
879 do {
880 ll=(r[i]+lc)&BN_MASK2;
881 r[i++]=ll;
882 lc=(lc > ll);
883 } while (lc);
884 }
885 else
886 {
887 lc= -c2;
888 do {
889 ll=r[i];
890 r[i++]=(ll-lc)&BN_MASK2;
891 lc=(lc > ll);
892 } while (lc);
893 }
894 }
895 }
896 #endif /* BN_RECURSION */
897
898 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
899 {
900 int ret=0;
901 int top,al,bl;
902 BIGNUM *rr;
903 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
904 int i;
905 #endif
906 #ifdef BN_RECURSION
907 BIGNUM *t=NULL;
908 int j=0,k;
909 #endif
910
911 bn_check_top(a);
912 bn_check_top(b);
913 bn_check_top(r);
914
915 al=a->top;
916 bl=b->top;
917
918 if ((al == 0) || (bl == 0))
919 {
920 BN_zero(r);
921 return(1);
922 }
923 top=al+bl;
924
925 BN_CTX_start(ctx);
926 if ((r == a) || (r == b))
927 {
928 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
929 }
930 else
931 rr = r;
932 rr->neg=a->neg^b->neg;
933
934 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
935 i = al-bl;
936 #endif
937 #ifdef BN_MUL_COMBA
938 if (i == 0)
939 {
940 # if 0
941 if (al == 4)
942 {
943 if (bn_wexpand(rr,8) == NULL) goto err;
944 rr->top=8;
945 bn_mul_comba4(rr->d,a->d,b->d);
946 goto end;
947 }
948 # endif
949 if (al == 8)
950 {
951 if (bn_wexpand(rr,16) == NULL) goto err;
952 rr->top=16;
953 bn_mul_comba8(rr->d,a->d,b->d);
954 goto end;
955 }
956 }
957 #endif /* BN_MUL_COMBA */
958 #ifdef BN_RECURSION
959 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
960 {
961 if (i >= -1 && i <= 1)
962 {
963 /* Find out the power of two lower or equal
964 to the longest of the two numbers */
965 if (i >= 0)
966 {
967 j = BN_num_bits_word((BN_ULONG)al);
968 }
969 if (i == -1)
970 {
971 j = BN_num_bits_word((BN_ULONG)bl);
972 }
973 j = 1<<(j-1);
974 assert(j <= al || j <= bl);
975 k = j+j;
976 t = BN_CTX_get(ctx);
977 if (t == NULL)
978 goto err;
979 if (al > j || bl > j)
980 {
981 if (bn_wexpand(t,k*4) == NULL) goto err;
982 if (bn_wexpand(rr,k*4) == NULL) goto err;
983 bn_mul_part_recursive(rr->d,a->d,b->d,
984 j,al-j,bl-j,t->d);
985 }
986 else /* al <= j || bl <= j */
987 {
988 if (bn_wexpand(t,k*2) == NULL) goto err;
989 if (bn_wexpand(rr,k*2) == NULL) goto err;
990 bn_mul_recursive(rr->d,a->d,b->d,
991 j,al-j,bl-j,t->d);
992 }
993 rr->top=top;
994 goto end;
995 }
996 #if 0
997 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
998 {
999 BIGNUM *tmp_bn = (BIGNUM *)b;
1000 if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1001 tmp_bn->d[bl]=0;
1002 bl++;
1003 i--;
1004 }
1005 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1006 {
1007 BIGNUM *tmp_bn = (BIGNUM *)a;
1008 if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1009 tmp_bn->d[al]=0;
1010 al++;
1011 i++;
1012 }
1013 if (i == 0)
1014 {
1015 /* symmetric and > 4 */
1016 /* 16 or larger */
1017 j=BN_num_bits_word((BN_ULONG)al);
1018 j=1<<(j-1);
1019 k=j+j;
1020 t = BN_CTX_get(ctx);
1021 if (al == j) /* exact multiple */
1022 {
1023 if (bn_wexpand(t,k*2) == NULL) goto err;
1024 if (bn_wexpand(rr,k*2) == NULL) goto err;
1025 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1026 }
1027 else
1028 {
1029 if (bn_wexpand(t,k*4) == NULL) goto err;
1030 if (bn_wexpand(rr,k*4) == NULL) goto err;
1031 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1032 }
1033 rr->top=top;
1034 goto end;
1035 }
1036 #endif
1037 }
1038 #endif /* BN_RECURSION */
1039 if (bn_wexpand(rr,top) == NULL) goto err;
1040 rr->top=top;
1041 bn_mul_normal(rr->d,a->d,al,b->d,bl);
1042
1043 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1044 end:
1045 #endif
1046 bn_correct_top(rr);
1047 if (r != rr) BN_copy(r,rr);
1048 ret=1;
1049 err:
1050 bn_check_top(r);
1051 BN_CTX_end(ctx);
1052 return(ret);
1053 }
1054
1055 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1056 {
1057 BN_ULONG *rr;
1058
1059 if (na < nb)
1060 {
1061 int itmp;
1062 BN_ULONG *ltmp;
1063
1064 itmp=na; na=nb; nb=itmp;
1065 ltmp=a; a=b; b=ltmp;
1066
1067 }
1068 rr= &(r[na]);
1069 if (nb <= 0)
1070 {
1071 (void)bn_mul_words(r,a,na,0);
1072 return;
1073 }
1074 else
1075 rr[0]=bn_mul_words(r,a,na,b[0]);
1076
1077 for (;;)
1078 {
1079 if (--nb <= 0) return;
1080 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1081 if (--nb <= 0) return;
1082 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1083 if (--nb <= 0) return;
1084 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1085 if (--nb <= 0) return;
1086 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1087 rr+=4;
1088 r+=4;
1089 b+=4;
1090 }
1091 }
1092
1093 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1094 {
1095 bn_mul_words(r,a,n,b[0]);
1096
1097 for (;;)
1098 {
1099 if (--n <= 0) return;
1100 bn_mul_add_words(&(r[1]),a,n,b[1]);
1101 if (--n <= 0) return;
1102 bn_mul_add_words(&(r[2]),a,n,b[2]);
1103 if (--n <= 0) return;
1104 bn_mul_add_words(&(r[3]),a,n,b[3]);
1105 if (--n <= 0) return;
1106 bn_mul_add_words(&(r[4]),a,n,b[4]);
1107 r+=4;
1108 b+=4;
1109 }
1110 }