]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/bn/bn_mul.c
Run util/openssl-format-source -v -c .
[thirdparty/openssl.git] / crypto / bn / bn_mul.c
CommitLineData
d02b48c6 1/* crypto/bn/bn_mul.c */
58964a49 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
d02b48c6
RE
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
40720ce3 8 *
d02b48c6
RE
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
40720ce3 15 *
d02b48c6
RE
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
40720ce3 22 *
d02b48c6
RE
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
40720ce3 37 * 4. If you include any Windows specific code (or a derivative thereof) from
d02b48c6
RE
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40720ce3 40 *
d02b48c6
RE
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
40720ce3 52 *
d02b48c6
RE
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
90e7ce36 59#ifndef BN_DEBUG
40720ce3 60# undef NDEBUG /* avoid conflicting definitions */
90e7ce36
UM
61# define NDEBUG
62#endif
63
d02b48c6 64#include <stdio.h>
baa257f1 65#include <assert.h>
d02b48c6
RE
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
699543e4 69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
40720ce3
MC
70/*
71 * Here follows specialised variants of bn_add_words() and bn_sub_words().
72 * They have the property performing operations on arrays of different sizes.
73 * The sizes of those arrays is expressed through cl, which is the common
74 * length ( basicall, min(len(a),len(b)) ), and dl, which is the delta
75 * between the two lengths, calculated as len(a)-len(b). All lengths are the
76 * number of BN_ULONGs... For the operations that require a result array as
77 * parameter, it must have the length cl+abs(dl). These functions should
78 * probably end up in bn_asm.c as soon as there are assembler counterparts
79 * for the systems that use assembler files.
80 */
baa257f1 81
baa257f1 82BN_ULONG bn_sub_part_words(BN_ULONG *r,
40720ce3
MC
83 const BN_ULONG *a, const BN_ULONG *b,
84 int cl, int dl)
85{
86 BN_ULONG c, t;
baa257f1 87
40720ce3
MC
88 assert(cl >= 0);
89 c = bn_sub_words(r, a, b, cl);
baa257f1 90
40720ce3
MC
91 if (dl == 0)
92 return c;
baa257f1 93
40720ce3
MC
94 r += cl;
95 a += cl;
96 b += cl;
baa257f1 97
40720ce3
MC
98 if (dl < 0) {
99# ifdef BN_COUNT
100 fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl,
101 dl, c);
102# endif
103 for (;;) {
104 t = b[0];
105 r[0] = (0 - t - c) & BN_MASK2;
106 if (t != 0)
107 c = 1;
108 if (++dl >= 0)
109 break;
110
111 t = b[1];
112 r[1] = (0 - t - c) & BN_MASK2;
113 if (t != 0)
114 c = 1;
115 if (++dl >= 0)
116 break;
117
118 t = b[2];
119 r[2] = (0 - t - c) & BN_MASK2;
120 if (t != 0)
121 c = 1;
122 if (++dl >= 0)
123 break;
124
125 t = b[3];
126 r[3] = (0 - t - c) & BN_MASK2;
127 if (t != 0)
128 c = 1;
129 if (++dl >= 0)
130 break;
131
132 b += 4;
133 r += 4;
134 }
135 } else {
136 int save_dl = dl;
137# ifdef BN_COUNT
138 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl,
139 dl, c);
140# endif
141 while (c) {
142 t = a[0];
143 r[0] = (t - c) & BN_MASK2;
144 if (t != 0)
145 c = 0;
146 if (--dl <= 0)
147 break;
148
149 t = a[1];
150 r[1] = (t - c) & BN_MASK2;
151 if (t != 0)
152 c = 0;
153 if (--dl <= 0)
154 break;
155
156 t = a[2];
157 r[2] = (t - c) & BN_MASK2;
158 if (t != 0)
159 c = 0;
160 if (--dl <= 0)
161 break;
162
163 t = a[3];
164 r[3] = (t - c) & BN_MASK2;
165 if (t != 0)
166 c = 0;
167 if (--dl <= 0)
168 break;
169
170 save_dl = dl;
171 a += 4;
172 r += 4;
173 }
174 if (dl > 0) {
175# ifdef BN_COUNT
176 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
177 cl, dl);
178# endif
179 if (save_dl > dl) {
180 switch (save_dl - dl) {
181 case 1:
182 r[1] = a[1];
183 if (--dl <= 0)
184 break;
185 case 2:
186 r[2] = a[2];
187 if (--dl <= 0)
188 break;
189 case 3:
190 r[3] = a[3];
191 if (--dl <= 0)
192 break;
193 }
194 a += 4;
195 r += 4;
196 }
197 }
198 if (dl > 0) {
199# ifdef BN_COUNT
200 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n",
201 cl, dl);
202# endif
203 for (;;) {
204 r[0] = a[0];
205 if (--dl <= 0)
206 break;
207 r[1] = a[1];
208 if (--dl <= 0)
209 break;
210 r[2] = a[2];
211 if (--dl <= 0)
212 break;
213 r[3] = a[3];
214 if (--dl <= 0)
215 break;
216
217 a += 4;
218 r += 4;
219 }
220 }
221 }
222 return c;
223}
240f5169 224#endif
baa257f1
RL
225
226BN_ULONG bn_add_part_words(BN_ULONG *r,
40720ce3
MC
227 const BN_ULONG *a, const BN_ULONG *b,
228 int cl, int dl)
229{
230 BN_ULONG c, l, t;
baa257f1 231
40720ce3
MC
232 assert(cl >= 0);
233 c = bn_add_words(r, a, b, cl);
baa257f1 234
40720ce3
MC
235 if (dl == 0)
236 return c;
baa257f1 237
40720ce3
MC
238 r += cl;
239 a += cl;
240 b += cl;
baa257f1 241
40720ce3
MC
242 if (dl < 0) {
243 int save_dl = dl;
baa257f1 244#ifdef BN_COUNT
40720ce3
MC
245 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl,
246 dl, c);
baa257f1 247#endif
40720ce3
MC
248 while (c) {
249 l = (c + b[0]) & BN_MASK2;
250 c = (l < c);
251 r[0] = l;
252 if (++dl >= 0)
253 break;
254
255 l = (c + b[1]) & BN_MASK2;
256 c = (l < c);
257 r[1] = l;
258 if (++dl >= 0)
259 break;
260
261 l = (c + b[2]) & BN_MASK2;
262 c = (l < c);
263 r[2] = l;
264 if (++dl >= 0)
265 break;
266
267 l = (c + b[3]) & BN_MASK2;
268 c = (l < c);
269 r[3] = l;
270 if (++dl >= 0)
271 break;
272
273 save_dl = dl;
274 b += 4;
275 r += 4;
276 }
277 if (dl < 0) {
baa257f1 278#ifdef BN_COUNT
40720ce3
MC
279 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n",
280 cl, dl);
baa257f1 281#endif
40720ce3
MC
282 if (save_dl < dl) {
283 switch (dl - save_dl) {
284 case 1:
285 r[1] = b[1];
286 if (++dl >= 0)
287 break;
288 case 2:
289 r[2] = b[2];
290 if (++dl >= 0)
291 break;
292 case 3:
293 r[3] = b[3];
294 if (++dl >= 0)
295 break;
296 }
297 b += 4;
298 r += 4;
299 }
300 }
301 if (dl < 0) {
baa257f1 302#ifdef BN_COUNT
40720ce3
MC
303 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n",
304 cl, dl);
baa257f1 305#endif
40720ce3
MC
306 for (;;) {
307 r[0] = b[0];
308 if (++dl >= 0)
309 break;
310 r[1] = b[1];
311 if (++dl >= 0)
312 break;
313 r[2] = b[2];
314 if (++dl >= 0)
315 break;
316 r[3] = b[3];
317 if (++dl >= 0)
318 break;
319
320 b += 4;
321 r += 4;
322 }
323 }
324 } else {
325 int save_dl = dl;
baa257f1 326#ifdef BN_COUNT
40720ce3 327 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
baa257f1 328#endif
40720ce3
MC
329 while (c) {
330 t = (a[0] + c) & BN_MASK2;
331 c = (t < c);
332 r[0] = t;
333 if (--dl <= 0)
334 break;
335
336 t = (a[1] + c) & BN_MASK2;
337 c = (t < c);
338 r[1] = t;
339 if (--dl <= 0)
340 break;
341
342 t = (a[2] + c) & BN_MASK2;
343 c = (t < c);
344 r[2] = t;
345 if (--dl <= 0)
346 break;
347
348 t = (a[3] + c) & BN_MASK2;
349 c = (t < c);
350 r[3] = t;
351 if (--dl <= 0)
352 break;
353
354 save_dl = dl;
355 a += 4;
356 r += 4;
357 }
baa257f1 358#ifdef BN_COUNT
40720ce3
MC
359 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl,
360 dl);
baa257f1 361#endif
40720ce3
MC
362 if (dl > 0) {
363 if (save_dl > dl) {
364 switch (save_dl - dl) {
365 case 1:
366 r[1] = a[1];
367 if (--dl <= 0)
368 break;
369 case 2:
370 r[2] = a[2];
371 if (--dl <= 0)
372 break;
373 case 3:
374 r[3] = a[3];
375 if (--dl <= 0)
376 break;
377 }
378 a += 4;
379 r += 4;
380 }
381 }
382 if (dl > 0) {
baa257f1 383#ifdef BN_COUNT
40720ce3
MC
384 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n",
385 cl, dl);
baa257f1 386#endif
40720ce3
MC
387 for (;;) {
388 r[0] = a[0];
389 if (--dl <= 0)
390 break;
391 r[1] = a[1];
392 if (--dl <= 0)
393 break;
394 r[2] = a[2];
395 if (--dl <= 0)
396 break;
397 r[3] = a[3];
398 if (--dl <= 0)
399 break;
400
401 a += 4;
402 r += 4;
403 }
404 }
405 }
406 return c;
407}
baa257f1 408
dfeab068 409#ifdef BN_RECURSION
40720ce3
MC
410/*
411 * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
412 * Computer Programming, Vol. 2)
413 */
8782a426 414
b558c8d5
TH
415/*-
416 * r is 2*n2 words in size,
dfeab068
RE
417 * a and b are both n2 words in size.
418 * n2 must be a power of 2.
419 * We multiply and return the result.
420 * t must be 2*n2 words in size
657e60fa 421 * We calculate
dfeab068
RE
422 * a[0]*b[0]
423 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
424 * a[1]*b[1]
425 */
231a737a 426/* dnX may not be positive, but n2/2+dnX has to be */
6b691a5c 427void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
40720ce3
MC
428 int dna, int dnb, BN_ULONG *t)
429{
430 int n = n2 / 2, c1, c2;
431 int tna = n + dna, tnb = n + dnb;
432 unsigned int neg, zero;
433 BN_ULONG ln, lo, *p;
d02b48c6 434
775c63fc 435# ifdef BN_COUNT
40720ce3 436 fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n", n2, dna, n2, dnb);
775c63fc
UM
437# endif
438# ifdef BN_MUL_COMBA
439# if 0
40720ce3
MC
440 if (n2 == 4) {
441 bn_mul_comba4(r, a, b);
442 return;
443 }
775c63fc 444# endif
40720ce3
MC
445 /*
446 * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete
447 * [steve]
448 */
449 if (n2 == 8 && dna == 0 && dnb == 0) {
450 bn_mul_comba8(r, a, b);
451 return;
452 }
453# endif /* BN_MUL_COMBA */
454 /* Else do normal multiply */
455 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
456 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
457 if ((dna + dnb) < 0)
458 memset(&r[2 * n2 + dna + dnb], 0,
459 sizeof(BN_ULONG) * -(dna + dnb));
460 return;
461 }
462 /* r=(a[0]-a[1])*(b[1]-b[0]) */
463 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
464 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
465 zero = neg = 0;
466 switch (c1 * 3 + c2) {
467 case -4:
468 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
469 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
470 break;
471 case -3:
472 zero = 1;
473 break;
474 case -2:
475 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
476 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
477 neg = 1;
478 break;
479 case -1:
480 case 0:
481 case 1:
482 zero = 1;
483 break;
484 case 2:
485 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
486 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
487 neg = 1;
488 break;
489 case 3:
490 zero = 1;
491 break;
492 case 4:
493 bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
494 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
495 break;
496 }
d02b48c6 497
775c63fc 498# ifdef BN_MUL_COMBA
40720ce3
MC
499 if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take
500 * extra args to do this well */
501 if (!zero)
502 bn_mul_comba4(&(t[n2]), t, &(t[n]));
503 else
504 memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
505
506 bn_mul_comba4(r, a, b);
507 bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
508 } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could
509 * take extra args to do
510 * this well */
511 if (!zero)
512 bn_mul_comba8(&(t[n2]), t, &(t[n]));
513 else
514 memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
515
516 bn_mul_comba8(r, a, b);
517 bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
518 } else
519# endif /* BN_MUL_COMBA */
520 {
521 p = &(t[n2 * 2]);
522 if (!zero)
523 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
524 else
525 memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
526 bn_mul_recursive(r, a, b, n, 0, 0, p);
527 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
528 }
529
530 /*-
531 * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
532 * r[10] holds (a[0]*b[0])
533 * r[32] holds (b[1]*b[1])
534 */
535
536 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
537
538 if (neg) { /* if t[32] is negative */
539 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
540 } else {
541 /* Might have a carry */
542 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
543 }
544
545 /*-
546 * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
547 * r[10] holds (a[0]*b[0])
548 * r[32] holds (b[1]*b[1])
549 * c1 holds the carry bits
550 */
551 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
552 if (c1) {
553 p = &(r[n + n2]);
554 lo = *p;
555 ln = (lo + c1) & BN_MASK2;
556 *p = ln;
557
558 /*
559 * The overflow will stop before we over write words we should not
560 * overwrite
561 */
562 if (ln < (BN_ULONG)c1) {
563 do {
564 p++;
565 lo = *p;
566 ln = (lo + 1) & BN_MASK2;
567 *p = ln;
568 } while (ln == 0);
569 }
570 }
571}
572
573/*
574 * n+tn is the word length t needs to be n*4 is size, as does r
575 */
231a737a 576/* tnX may not be negative but less than n */
6a2347ee 577void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
40720ce3
MC
578 int tna, int tnb, BN_ULONG *t)
579{
580 int i, j, n2 = n * 2;
581 int c1, c2, neg;
582 BN_ULONG ln, lo, *p;
58964a49 583
775c63fc 584# ifdef BN_COUNT
40720ce3
MC
585 fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
586 n, tna, n, tnb);
775c63fc 587# endif
40720ce3
MC
588 if (n < 8) {
589 bn_mul_normal(r, a, n + tna, b, n + tnb);
590 return;
591 }
592
593 /* r=(a[0]-a[1])*(b[1]-b[0]) */
594 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
595 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
596 neg = 0;
597 switch (c1 * 3 + c2) {
598 case -4:
599 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
600 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
601 break;
602 case -3:
603 /* break; */
604 case -2:
605 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
606 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
607 neg = 1;
608 break;
609 case -1:
610 case 0:
611 case 1:
612 /* break; */
613 case 2:
614 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
615 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
616 neg = 1;
617 break;
618 case 3:
619 /* break; */
620 case 4:
621 bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
622 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
623 break;
624 }
625 /*
626 * The zero case isn't yet implemented here. The speedup would probably
627 * be negligible.
628 */
775c63fc 629# if 0
40720ce3
MC
630 if (n == 4) {
631 bn_mul_comba4(&(t[n2]), t, &(t[n]));
632 bn_mul_comba4(r, a, b);
633 bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
634 memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
635 } else
775c63fc 636# endif
40720ce3
MC
637 if (n == 8) {
638 bn_mul_comba8(&(t[n2]), t, &(t[n]));
639 bn_mul_comba8(r, a, b);
640 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
641 memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
642 } else {
643 p = &(t[n2 * 2]);
644 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
645 bn_mul_recursive(r, a, b, n, 0, 0, p);
646 i = n / 2;
647 /*
648 * If there is only a bottom half to the number, just do it
649 */
650 if (tna > tnb)
651 j = tna - i;
652 else
653 j = tnb - i;
654 if (j == 0) {
655 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
656 i, tna - i, tnb - i, p);
657 memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2));
658 } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */
659 bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
660 i, tna - i, tnb - i, p);
661 memset(&(r[n2 + tna + tnb]), 0,
662 sizeof(BN_ULONG) * (n2 - tna - tnb));
663 } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
664
665 memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
666 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
667 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
668 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
669 } else {
670 for (;;) {
671 i /= 2;
672 /*
673 * these simplified conditions work exclusively because
674 * difference between tna and tnb is 1 or 0
675 */
676 if (i < tna || i < tnb) {
677 bn_mul_part_recursive(&(r[n2]),
678 &(a[n]), &(b[n]),
679 i, tna - i, tnb - i, p);
680 break;
681 } else if (i == tna || i == tnb) {
682 bn_mul_recursive(&(r[n2]),
683 &(a[n]), &(b[n]),
684 i, tna - i, tnb - i, p);
685 break;
686 }
687 }
688 }
689 }
690 }
691
692 /*-
693 * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
694 * r[10] holds (a[0]*b[0])
695 * r[32] holds (b[1]*b[1])
696 */
697
698 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
699
700 if (neg) { /* if t[32] is negative */
701 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
702 } else {
703 /* Might have a carry */
704 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
705 }
706
707 /*-
708 * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
709 * r[10] holds (a[0]*b[0])
710 * r[32] holds (b[1]*b[1])
711 * c1 holds the carry bits
712 */
713 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
714 if (c1) {
715 p = &(r[n + n2]);
716 lo = *p;
717 ln = (lo + c1) & BN_MASK2;
718 *p = ln;
719
720 /*
721 * The overflow will stop before we over write words we should not
722 * overwrite
723 */
724 if (ln < (BN_ULONG)c1) {
725 do {
726 p++;
727 lo = *p;
728 ln = (lo + 1) & BN_MASK2;
729 *p = ln;
730 } while (ln == 0);
731 }
732 }
733}
58964a49 734
b558c8d5
TH
735/*-
736 * a and b must be the same size, which is n2.
dfeab068
RE
737 * r needs to be n2 words and t needs to be n2*2
738 */
6b691a5c 739void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
40720ce3
MC
740 BN_ULONG *t)
741{
742 int n = n2 / 2;
dfeab068 743
775c63fc 744# ifdef BN_COUNT
40720ce3 745 fprintf(stderr, " bn_mul_low_recursive %d * %d\n", n2, n2);
775c63fc 746# endif
dfeab068 747
40720ce3
MC
748 bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
749 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
750 bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
751 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
752 bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
753 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
754 } else {
755 bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
756 bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
757 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
758 bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
759 }
760}
58964a49 761
b558c8d5
TH
762/*-
763 * a and b must be the same size, which is n2.
dfeab068
RE
764 * r needs to be n2 words and t needs to be n2*2
765 * l is the low words of the output.
766 * t needs to be n2*3
767 */
6b691a5c 768void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
40720ce3
MC
769 BN_ULONG *t)
770{
771 int i, n;
772 int c1, c2;
773 int neg, oneg, zero;
774 BN_ULONG ll, lc, *lp, *mp;
dfeab068 775
775c63fc 776# ifdef BN_COUNT
40720ce3 777 fprintf(stderr, " bn_mul_high %d * %d\n", n2, n2);
775c63fc 778# endif
40720ce3
MC
779 n = n2 / 2;
780
781 /* Calculate (al-ah)*(bh-bl) */
782 neg = zero = 0;
783 c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
784 c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
785 switch (c1 * 3 + c2) {
786 case -4:
787 bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
788 bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
789 break;
790 case -3:
791 zero = 1;
792 break;
793 case -2:
794 bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
795 bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
796 neg = 1;
797 break;
798 case -1:
799 case 0:
800 case 1:
801 zero = 1;
802 break;
803 case 2:
804 bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
805 bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
806 neg = 1;
807 break;
808 case 3:
809 zero = 1;
810 break;
811 case 4:
812 bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
813 bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
814 break;
815 }
816
817 oneg = neg;
818 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
819 /* r[10] = (a[1]*b[1]) */
775c63fc 820# ifdef BN_MUL_COMBA
40720ce3
MC
821 if (n == 8) {
822 bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
823 bn_mul_comba8(r, &(a[n]), &(b[n]));
824 } else
775c63fc 825# endif
40720ce3
MC
826 {
827 bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
828 bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
829 }
830
831 /*-
832 * s0 == low(al*bl)
833 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
834 * We know s0 and s1 so the only unknown is high(al*bl)
835 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
836 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
837 */
838 if (l != NULL) {
839 lp = &(t[n2 + n]);
840 c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
841 } else {
842 c1 = 0;
843 lp = &(r[0]);
844 }
845
846 if (neg)
847 neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
848 else {
849 bn_add_words(&(t[n2]), lp, &(t[0]), n);
850 neg = 0;
851 }
852
853 if (l != NULL) {
854 bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
855 } else {
856 lp = &(t[n2 + n]);
857 mp = &(t[n2]);
858 for (i = 0; i < n; i++)
859 lp[i] = ((~mp[i]) + 1) & BN_MASK2;
860 }
861
862 /*-
863 * s[0] = low(al*bl)
864 * t[3] = high(al*bl)
865 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
866 * r[10] = (a[1]*b[1])
867 */
868 /*-
869 * R[10] = al*bl
870 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
871 * R[32] = ah*bh
872 */
873 /*-
874 * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
875 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
876 * R[3]=r[1]+(carry/borrow)
877 */
878 if (l != NULL) {
879 lp = &(t[n2]);
880 c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
881 } else {
882 lp = &(t[n2 + n]);
883 c1 = 0;
884 }
885 c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
886 if (oneg)
887 c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
888 else
889 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
890
891 c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
892 c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
893 if (oneg)
894 c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
895 else
896 c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
897
898 if (c1 != 0) { /* Add starting at r[0], could be +ve or -ve */
899 i = 0;
900 if (c1 > 0) {
901 lc = c1;
902 do {
903 ll = (r[i] + lc) & BN_MASK2;
904 r[i++] = ll;
905 lc = (lc > ll);
906 } while (lc);
907 } else {
908 lc = -c1;
909 do {
910 ll = r[i];
911 r[i++] = (ll - lc) & BN_MASK2;
912 lc = (lc > ll);
913 } while (lc);
914 }
915 }
916 if (c2 != 0) { /* Add starting at r[1] */
917 i = n;
918 if (c2 > 0) {
919 lc = c2;
920 do {
921 ll = (r[i] + lc) & BN_MASK2;
922 r[i++] = ll;
923 lc = (lc > ll);
924 } while (lc);
925 } else {
926 lc = -c2;
927 do {
928 ll = r[i];
929 r[i++] = (ll - lc) & BN_MASK2;
930 lc = (lc > ll);
931 } while (lc);
932 }
933 }
934}
935#endif /* BN_RECURSION */
58964a49 936
6a2347ee 937int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
40720ce3
MC
938{
939 int ret = 0;
940 int top, al, bl;
941 BIGNUM *rr;
775c63fc 942#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
40720ce3 943 int i;
775c63fc 944#endif
a0a54079 945#ifdef BN_RECURSION
40720ce3
MC
946 BIGNUM *t = NULL;
947 int j = 0, k;
a0a54079 948#endif
dfeab068
RE
949
950#ifdef BN_COUNT
40720ce3 951 fprintf(stderr, "BN_mul %d * %d\n", a->top, b->top);
dfeab068
RE
952#endif
953
40720ce3
MC
954 bn_check_top(a);
955 bn_check_top(b);
956 bn_check_top(r);
957
958 al = a->top;
959 bl = b->top;
960
961 if ((al == 0) || (bl == 0)) {
962 BN_zero(r);
963 return (1);
964 }
965 top = al + bl;
966
967 BN_CTX_start(ctx);
968 if ((r == a) || (r == b)) {
969 if ((rr = BN_CTX_get(ctx)) == NULL)
970 goto err;
971 } else
972 rr = r;
973 rr->neg = a->neg ^ b->neg;
a0a54079 974
dfeab068 975#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
40720ce3 976 i = al - bl;
775c63fc
UM
977#endif
978#ifdef BN_MUL_COMBA
40720ce3 979 if (i == 0) {
775c63fc 980# if 0
40720ce3
MC
981 if (al == 4) {
982 if (bn_wexpand(rr, 8) == NULL)
983 goto err;
984 rr->top = 8;
985 bn_mul_comba4(rr->d, a->d, b->d);
986 goto end;
987 }
775c63fc 988# endif
40720ce3
MC
989 if (al == 8) {
990 if (bn_wexpand(rr, 16) == NULL)
991 goto err;
992 rr->top = 16;
993 bn_mul_comba8(rr->d, a->d, b->d);
994 goto end;
995 }
996 }
997#endif /* BN_MUL_COMBA */
dfeab068 998#ifdef BN_RECURSION
40720ce3
MC
999 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1000 if (i >= -1 && i <= 1) {
1001 /*
1002 * Find out the power of two lower or equal to the longest of the
1003 * two numbers
1004 */
1005 if (i >= 0) {
1006 j = BN_num_bits_word((BN_ULONG)al);
1007 }
1008 if (i == -1) {
1009 j = BN_num_bits_word((BN_ULONG)bl);
1010 }
1011 j = 1 << (j - 1);
1012 assert(j <= al || j <= bl);
1013 k = j + j;
1014 t = BN_CTX_get(ctx);
1015 if (t == NULL)
1016 goto err;
1017 if (al > j || bl > j) {
1018 if (bn_wexpand(t, k * 4) == NULL)
1019 goto err;
1020 if (bn_wexpand(rr, k * 4) == NULL)
1021 goto err;
1022 bn_mul_part_recursive(rr->d, a->d, b->d,
1023 j, al - j, bl - j, t->d);
1024 } else { /* al <= j || bl <= j */
1025
1026 if (bn_wexpand(t, k * 2) == NULL)
1027 goto err;
1028 if (bn_wexpand(rr, k * 2) == NULL)
1029 goto err;
1030 bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
1031 }
1032 rr->top = top;
1033 goto end;
1034 }
1035# if 0
1036 if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1037 BIGNUM *tmp_bn = (BIGNUM *)b;
1038 if (bn_wexpand(tmp_bn, al) == NULL)
1039 goto err;
1040 tmp_bn->d[bl] = 0;
1041 bl++;
1042 i--;
1043 } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1044 BIGNUM *tmp_bn = (BIGNUM *)a;
1045 if (bn_wexpand(tmp_bn, bl) == NULL)
1046 goto err;
1047 tmp_bn->d[al] = 0;
1048 al++;
1049 i++;
1050 }
1051 if (i == 0) {
1052 /* symmetric and > 4 */
1053 /* 16 or larger */
1054 j = BN_num_bits_word((BN_ULONG)al);
1055 j = 1 << (j - 1);
1056 k = j + j;
1057 t = BN_CTX_get(ctx);
1058 if (al == j) { /* exact multiple */
1059 if (bn_wexpand(t, k * 2) == NULL)
1060 goto err;
1061 if (bn_wexpand(rr, k * 2) == NULL)
1062 goto err;
1063 bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1064 } else {
1065 if (bn_wexpand(t, k * 4) == NULL)
1066 goto err;
1067 if (bn_wexpand(rr, k * 4) == NULL)
1068 goto err;
1069 bn_mul_part_recursive(rr->d, a->d, b->d, al - j, j, t->d);
1070 }
1071 rr->top = top;
1072 goto end;
1073 }
1074# endif
1075 }
1076#endif /* BN_RECURSION */
1077 if (bn_wexpand(rr, top) == NULL)
1078 goto err;
1079 rr->top = top;
1080 bn_mul_normal(rr->d, a->d, al, b->d, bl);
58964a49 1081
a0a54079 1082#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
40720ce3 1083 end:
a0a54079 1084#endif
40720ce3
MC
1085 bn_correct_top(rr);
1086 if (r != rr)
1087 BN_copy(r, rr);
1088 ret = 1;
1089 err:
1090 bn_check_top(r);
1091 BN_CTX_end(ctx);
1092 return (ret);
1093}
58964a49 1094
6b691a5c 1095void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
40720ce3
MC
1096{
1097 BN_ULONG *rr;
58964a49 1098
dfeab068 1099#ifdef BN_COUNT
40720ce3 1100 fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
dfeab068 1101#endif
58964a49 1102
40720ce3
MC
1103 if (na < nb) {
1104 int itmp;
1105 BN_ULONG *ltmp;
1106
1107 itmp = na;
1108 na = nb;
1109 nb = itmp;
1110 ltmp = a;
1111 a = b;
1112 b = ltmp;
1113
1114 }
1115 rr = &(r[na]);
1116 if (nb <= 0) {
1117 (void)bn_mul_words(r, a, na, 0);
1118 return;
1119 } else
1120 rr[0] = bn_mul_words(r, a, na, b[0]);
1121
1122 for (;;) {
1123 if (--nb <= 0)
1124 return;
1125 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1126 if (--nb <= 0)
1127 return;
1128 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1129 if (--nb <= 0)
1130 return;
1131 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1132 if (--nb <= 0)
1133 return;
1134 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1135 rr += 4;
1136 r += 4;
1137 b += 4;
1138 }
1139}
dfeab068 1140
6b691a5c 1141void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
40720ce3 1142{
dfeab068 1143#ifdef BN_COUNT
40720ce3 1144 fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
58964a49 1145#endif
40720ce3
MC
1146 bn_mul_words(r, a, n, b[0]);
1147
1148 for (;;) {
1149 if (--n <= 0)
1150 return;
1151 bn_mul_add_words(&(r[1]), a, n, b[1]);
1152 if (--n <= 0)
1153 return;
1154 bn_mul_add_words(&(r[2]), a, n, b[2]);
1155 if (--n <= 0)
1156 return;
1157 bn_mul_add_words(&(r[3]), a, n, b[3]);
1158 if (--n <= 0)
1159 return;
1160 bn_mul_add_words(&(r[4]), a, n, b[4]);
1161 r += 4;
1162 b += 4;
1163 }
1164}