]>
Commit | Line | Data |
---|---|---|
4f22f405 RS |
1 | /* |
2 | * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. | |
dfeab068 | 3 | * |
367ace68 | 4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
4f22f405 RS |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at | |
7 | * https://www.openssl.org/source/license.html | |
dfeab068 RE |
8 | */ |
9 | ||
775c63fc | 10 | #include <assert.h> |
d749e108 | 11 | #include <openssl/crypto.h> |
b39fc560 | 12 | #include "internal/cryptlib.h" |
dfeab068 RE |
13 | #include "bn_lcl.h" |
14 | ||
fb81ac5e | 15 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) |
dfeab068 | 16 | |
0f113f3e MC |
17 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, |
18 | BN_ULONG w) | |
19 | { | |
20 | BN_ULONG c1 = 0; | |
21 | ||
22 | assert(num >= 0); | |
23 | if (num <= 0) | |
26a7d938 | 24 | return c1; |
0f113f3e MC |
25 | |
26 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
27 | while (num & ~3) { | |
28 | mul_add(rp[0], ap[0], w, c1); | |
29 | mul_add(rp[1], ap[1], w, c1); | |
30 | mul_add(rp[2], ap[2], w, c1); | |
31 | mul_add(rp[3], ap[3], w, c1); | |
32 | ap += 4; | |
33 | rp += 4; | |
34 | num -= 4; | |
35 | } | |
36 | # endif | |
37 | while (num) { | |
38 | mul_add(rp[0], ap[0], w, c1); | |
39 | ap++; | |
40 | rp++; | |
41 | num--; | |
42 | } | |
43 | ||
26a7d938 | 44 | return c1; |
0f113f3e | 45 | } |
dfeab068 | 46 | |
2fbc8a2a | 47 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
0f113f3e MC |
48 | { |
49 | BN_ULONG c1 = 0; | |
50 | ||
51 | assert(num >= 0); | |
52 | if (num <= 0) | |
26a7d938 | 53 | return c1; |
0f113f3e MC |
54 | |
55 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
56 | while (num & ~3) { | |
57 | mul(rp[0], ap[0], w, c1); | |
58 | mul(rp[1], ap[1], w, c1); | |
59 | mul(rp[2], ap[2], w, c1); | |
60 | mul(rp[3], ap[3], w, c1); | |
61 | ap += 4; | |
62 | rp += 4; | |
63 | num -= 4; | |
64 | } | |
65 | # endif | |
66 | while (num) { | |
67 | mul(rp[0], ap[0], w, c1); | |
68 | ap++; | |
69 | rp++; | |
70 | num--; | |
71 | } | |
26a7d938 | 72 | return c1; |
0f113f3e | 73 | } |
dfeab068 | 74 | |
2fbc8a2a | 75 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
0f113f3e MC |
76 | { |
77 | assert(n >= 0); | |
78 | if (n <= 0) | |
79 | return; | |
80 | ||
81 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
82 | while (n & ~3) { | |
83 | sqr(r[0], r[1], a[0]); | |
84 | sqr(r[2], r[3], a[1]); | |
85 | sqr(r[4], r[5], a[2]); | |
86 | sqr(r[6], r[7], a[3]); | |
87 | a += 4; | |
88 | r += 8; | |
89 | n -= 4; | |
90 | } | |
91 | # endif | |
92 | while (n) { | |
93 | sqr(r[0], r[1], a[0]); | |
94 | a++; | |
95 | r += 2; | |
96 | n--; | |
97 | } | |
98 | } | |
99 | ||
100 | #else /* !(defined(BN_LLONG) || | |
101 | * defined(BN_UMULT_HIGH)) */ | |
102 | ||
103 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, | |
104 | BN_ULONG w) | |
105 | { | |
106 | BN_ULONG c = 0; | |
107 | BN_ULONG bl, bh; | |
108 | ||
109 | assert(num >= 0); | |
110 | if (num <= 0) | |
26a7d938 | 111 | return (BN_ULONG)0; |
0f113f3e MC |
112 | |
113 | bl = LBITS(w); | |
114 | bh = HBITS(w); | |
115 | ||
116 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
117 | while (num & ~3) { | |
118 | mul_add(rp[0], ap[0], bl, bh, c); | |
119 | mul_add(rp[1], ap[1], bl, bh, c); | |
120 | mul_add(rp[2], ap[2], bl, bh, c); | |
121 | mul_add(rp[3], ap[3], bl, bh, c); | |
122 | ap += 4; | |
123 | rp += 4; | |
124 | num -= 4; | |
125 | } | |
126 | # endif | |
127 | while (num) { | |
128 | mul_add(rp[0], ap[0], bl, bh, c); | |
129 | ap++; | |
130 | rp++; | |
131 | num--; | |
132 | } | |
26a7d938 | 133 | return c; |
0f113f3e | 134 | } |
dfeab068 | 135 | |
6343829a | 136 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
0f113f3e MC |
137 | { |
138 | BN_ULONG carry = 0; | |
139 | BN_ULONG bl, bh; | |
140 | ||
141 | assert(num >= 0); | |
142 | if (num <= 0) | |
26a7d938 | 143 | return (BN_ULONG)0; |
0f113f3e MC |
144 | |
145 | bl = LBITS(w); | |
146 | bh = HBITS(w); | |
147 | ||
148 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
149 | while (num & ~3) { | |
150 | mul(rp[0], ap[0], bl, bh, carry); | |
151 | mul(rp[1], ap[1], bl, bh, carry); | |
152 | mul(rp[2], ap[2], bl, bh, carry); | |
153 | mul(rp[3], ap[3], bl, bh, carry); | |
154 | ap += 4; | |
155 | rp += 4; | |
156 | num -= 4; | |
157 | } | |
158 | # endif | |
159 | while (num) { | |
160 | mul(rp[0], ap[0], bl, bh, carry); | |
161 | ap++; | |
162 | rp++; | |
163 | num--; | |
164 | } | |
26a7d938 | 165 | return carry; |
0f113f3e | 166 | } |
dfeab068 | 167 | |
6343829a | 168 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
0f113f3e MC |
169 | { |
170 | assert(n >= 0); | |
171 | if (n <= 0) | |
172 | return; | |
173 | ||
174 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
175 | while (n & ~3) { | |
176 | sqr64(r[0], r[1], a[0]); | |
177 | sqr64(r[2], r[3], a[1]); | |
178 | sqr64(r[4], r[5], a[2]); | |
179 | sqr64(r[6], r[7], a[3]); | |
180 | a += 4; | |
181 | r += 8; | |
182 | n -= 4; | |
183 | } | |
184 | # endif | |
185 | while (n) { | |
186 | sqr64(r[0], r[1], a[0]); | |
187 | a++; | |
188 | r += 2; | |
189 | n--; | |
190 | } | |
191 | } | |
192 | ||
193 | #endif /* !(defined(BN_LLONG) || | |
194 | * defined(BN_UMULT_HIGH)) */ | |
dfeab068 RE |
195 | |
196 | #if defined(BN_LLONG) && defined(BN_DIV2W) | |
197 | ||
6b691a5c | 198 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
0f113f3e MC |
199 | { |
200 | return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d)); | |
201 | } | |
dfeab068 RE |
202 | |
203 | #else | |
204 | ||
bbb8de09 | 205 | /* Divide h,l by d and return the result. */ |
dfeab068 | 206 | /* I need to test this some more :-( */ |
6b691a5c | 207 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
0f113f3e MC |
208 | { |
209 | BN_ULONG dh, dl, q, ret = 0, th, tl, t; | |
210 | int i, count = 2; | |
211 | ||
212 | if (d == 0) | |
26a7d938 | 213 | return BN_MASK2; |
0f113f3e MC |
214 | |
215 | i = BN_num_bits_word(d); | |
216 | assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); | |
217 | ||
218 | i = BN_BITS2 - i; | |
219 | if (h >= d) | |
220 | h -= d; | |
221 | ||
222 | if (i) { | |
223 | d <<= i; | |
224 | h = (h << i) | (l >> (BN_BITS2 - i)); | |
225 | l <<= i; | |
226 | } | |
227 | dh = (d & BN_MASK2h) >> BN_BITS4; | |
228 | dl = (d & BN_MASK2l); | |
229 | for (;;) { | |
230 | if ((h >> BN_BITS4) == dh) | |
231 | q = BN_MASK2l; | |
232 | else | |
233 | q = h / dh; | |
234 | ||
235 | th = q * dh; | |
236 | tl = dl * q; | |
237 | for (;;) { | |
238 | t = h - th; | |
239 | if ((t & BN_MASK2h) || | |
240 | ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) | |
241 | break; | |
242 | q--; | |
243 | th -= dh; | |
244 | tl -= dl; | |
245 | } | |
246 | t = (tl >> BN_BITS4); | |
247 | tl = (tl << BN_BITS4) & BN_MASK2h; | |
248 | th += t; | |
249 | ||
250 | if (l < tl) | |
251 | th++; | |
252 | l -= tl; | |
253 | if (h < th) { | |
254 | h += d; | |
255 | q--; | |
256 | } | |
257 | h -= th; | |
258 | ||
259 | if (--count == 0) | |
260 | break; | |
261 | ||
262 | ret = q << BN_BITS4; | |
263 | h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2; | |
264 | l = (l & BN_MASK2l) << BN_BITS4; | |
265 | } | |
266 | ret |= q; | |
26a7d938 | 267 | return ret; |
0f113f3e MC |
268 | } |
269 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ | |
dfeab068 RE |
270 | |
271 | #ifdef BN_LLONG | |
0f113f3e MC |
272 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
273 | int n) | |
274 | { | |
275 | BN_ULLONG ll = 0; | |
276 | ||
277 | assert(n >= 0); | |
278 | if (n <= 0) | |
26a7d938 | 279 | return (BN_ULONG)0; |
0f113f3e MC |
280 | |
281 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
282 | while (n & ~3) { | |
283 | ll += (BN_ULLONG) a[0] + b[0]; | |
284 | r[0] = (BN_ULONG)ll & BN_MASK2; | |
285 | ll >>= BN_BITS2; | |
286 | ll += (BN_ULLONG) a[1] + b[1]; | |
287 | r[1] = (BN_ULONG)ll & BN_MASK2; | |
288 | ll >>= BN_BITS2; | |
289 | ll += (BN_ULLONG) a[2] + b[2]; | |
290 | r[2] = (BN_ULONG)ll & BN_MASK2; | |
291 | ll >>= BN_BITS2; | |
292 | ll += (BN_ULLONG) a[3] + b[3]; | |
293 | r[3] = (BN_ULONG)ll & BN_MASK2; | |
294 | ll >>= BN_BITS2; | |
295 | a += 4; | |
296 | b += 4; | |
297 | r += 4; | |
298 | n -= 4; | |
299 | } | |
300 | # endif | |
301 | while (n) { | |
302 | ll += (BN_ULLONG) a[0] + b[0]; | |
303 | r[0] = (BN_ULONG)ll & BN_MASK2; | |
304 | ll >>= BN_BITS2; | |
305 | a++; | |
306 | b++; | |
307 | r++; | |
308 | n--; | |
309 | } | |
26a7d938 | 310 | return (BN_ULONG)ll; |
0f113f3e MC |
311 | } |
312 | #else /* !BN_LLONG */ | |
313 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | |
314 | int n) | |
315 | { | |
316 | BN_ULONG c, l, t; | |
317 | ||
318 | assert(n >= 0); | |
319 | if (n <= 0) | |
26a7d938 | 320 | return (BN_ULONG)0; |
0f113f3e MC |
321 | |
322 | c = 0; | |
323 | # ifndef OPENSSL_SMALL_FOOTPRINT | |
324 | while (n & ~3) { | |
325 | t = a[0]; | |
326 | t = (t + c) & BN_MASK2; | |
327 | c = (t < c); | |
328 | l = (t + b[0]) & BN_MASK2; | |
329 | c += (l < t); | |
330 | r[0] = l; | |
331 | t = a[1]; | |
332 | t = (t + c) & BN_MASK2; | |
333 | c = (t < c); | |
334 | l = (t + b[1]) & BN_MASK2; | |
335 | c += (l < t); | |
336 | r[1] = l; | |
337 | t = a[2]; | |
338 | t = (t + c) & BN_MASK2; | |
339 | c = (t < c); | |
340 | l = (t + b[2]) & BN_MASK2; | |
341 | c += (l < t); | |
342 | r[2] = l; | |
343 | t = a[3]; | |
344 | t = (t + c) & BN_MASK2; | |
345 | c = (t < c); | |
346 | l = (t + b[3]) & BN_MASK2; | |
347 | c += (l < t); | |
348 | r[3] = l; | |
349 | a += 4; | |
350 | b += 4; | |
351 | r += 4; | |
352 | n -= 4; | |
353 | } | |
354 | # endif | |
355 | while (n) { | |
356 | t = a[0]; | |
357 | t = (t + c) & BN_MASK2; | |
358 | c = (t < c); | |
359 | l = (t + b[0]) & BN_MASK2; | |
360 | c += (l < t); | |
361 | r[0] = l; | |
362 | a++; | |
363 | b++; | |
364 | r++; | |
365 | n--; | |
366 | } | |
26a7d938 | 367 | return (BN_ULONG)c; |
0f113f3e MC |
368 | } |
369 | #endif /* !BN_LLONG */ | |
370 | ||
371 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | |
372 | int n) | |
373 | { | |
374 | BN_ULONG t1, t2; | |
375 | int c = 0; | |
376 | ||
377 | assert(n >= 0); | |
378 | if (n <= 0) | |
26a7d938 | 379 | return (BN_ULONG)0; |
dfeab068 | 380 | |
682b112a | 381 | #ifndef OPENSSL_SMALL_FOOTPRINT |
0f113f3e MC |
382 | while (n & ~3) { |
383 | t1 = a[0]; | |
384 | t2 = b[0]; | |
385 | r[0] = (t1 - t2 - c) & BN_MASK2; | |
386 | if (t1 != t2) | |
387 | c = (t1 < t2); | |
388 | t1 = a[1]; | |
389 | t2 = b[1]; | |
390 | r[1] = (t1 - t2 - c) & BN_MASK2; | |
391 | if (t1 != t2) | |
392 | c = (t1 < t2); | |
393 | t1 = a[2]; | |
394 | t2 = b[2]; | |
395 | r[2] = (t1 - t2 - c) & BN_MASK2; | |
396 | if (t1 != t2) | |
397 | c = (t1 < t2); | |
398 | t1 = a[3]; | |
399 | t2 = b[3]; | |
400 | r[3] = (t1 - t2 - c) & BN_MASK2; | |
401 | if (t1 != t2) | |
402 | c = (t1 < t2); | |
403 | a += 4; | |
404 | b += 4; | |
405 | r += 4; | |
406 | n -= 4; | |
407 | } | |
682b112a | 408 | #endif |
0f113f3e MC |
409 | while (n) { |
410 | t1 = a[0]; | |
411 | t2 = b[0]; | |
412 | r[0] = (t1 - t2 - c) & BN_MASK2; | |
413 | if (t1 != t2) | |
414 | c = (t1 < t2); | |
415 | a++; | |
416 | b++; | |
417 | r++; | |
418 | n--; | |
419 | } | |
26a7d938 | 420 | return c; |
0f113f3e | 421 | } |
dfeab068 | 422 | |
682b112a | 423 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) |
dfeab068 | 424 | |
0f113f3e MC |
425 | # undef bn_mul_comba8 |
426 | # undef bn_mul_comba4 | |
427 | # undef bn_sqr_comba8 | |
428 | # undef bn_sqr_comba4 | |
dfeab068 | 429 | |
775c63fc UM |
430 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ |
431 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | |
432 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | |
0f113f3e MC |
433 | /* |
434 | * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number | |
435 | * c=(c2,c1,c0) | |
436 | */ | |
775c63fc | 437 | |
0f113f3e | 438 | # ifdef BN_LLONG |
a7a44ba5 AP |
439 | /* |
440 | * Keep in mind that additions to multiplication result can not | |
441 | * overflow, because its high half cannot be all-ones. | |
442 | */ | |
0f113f3e MC |
443 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
444 | BN_ULONG hi; \ | |
445 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | |
446 | t += c0; /* no carry */ \ | |
447 | c0 = (BN_ULONG)Lw(t); \ | |
448 | hi = (BN_ULONG)Hw(t); \ | |
449 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
450 | } while(0) | |
451 | ||
452 | # define mul_add_c2(a,b,c0,c1,c2) do { \ | |
453 | BN_ULONG hi; \ | |
454 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | |
455 | BN_ULLONG tt = t+c0; /* no carry */ \ | |
456 | c0 = (BN_ULONG)Lw(tt); \ | |
457 | hi = (BN_ULONG)Hw(tt); \ | |
458 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
459 | t += c0; /* no carry */ \ | |
460 | c0 = (BN_ULONG)Lw(t); \ | |
461 | hi = (BN_ULONG)Hw(t); \ | |
462 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
463 | } while(0) | |
464 | ||
465 | # define sqr_add_c(a,i,c0,c1,c2) do { \ | |
466 | BN_ULONG hi; \ | |
467 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | |
468 | t += c0; /* no carry */ \ | |
469 | c0 = (BN_ULONG)Lw(t); \ | |
470 | hi = (BN_ULONG)Hw(t); \ | |
471 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
472 | } while(0) | |
473 | ||
474 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ | |
475 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | |
476 | ||
477 | # elif defined(BN_UMULT_LOHI) | |
a7a44ba5 AP |
478 | /* |
479 | * Keep in mind that additions to hi can not overflow, because | |
480 | * the high word of a multiplication result cannot be all-ones. | |
481 | */ | |
0f113f3e MC |
482 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
483 | BN_ULONG ta = (a), tb = (b); \ | |
484 | BN_ULONG lo, hi; \ | |
485 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | |
486 | c0 += lo; hi += (c0<lo)?1:0; \ | |
487 | c1 += hi; c2 += (c1<hi)?1:0; \ | |
488 | } while(0) | |
489 | ||
490 | # define mul_add_c2(a,b,c0,c1,c2) do { \ | |
491 | BN_ULONG ta = (a), tb = (b); \ | |
492 | BN_ULONG lo, hi, tt; \ | |
493 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | |
494 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | |
495 | c1 += tt; c2 += (c1<tt)?1:0; \ | |
496 | c0 += lo; hi += (c0<lo)?1:0; \ | |
497 | c1 += hi; c2 += (c1<hi)?1:0; \ | |
498 | } while(0) | |
499 | ||
500 | # define sqr_add_c(a,i,c0,c1,c2) do { \ | |
501 | BN_ULONG ta = (a)[i]; \ | |
502 | BN_ULONG lo, hi; \ | |
503 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | |
504 | c0 += lo; hi += (c0<lo)?1:0; \ | |
505 | c1 += hi; c2 += (c1<hi)?1:0; \ | |
506 | } while(0) | |
507 | ||
508 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ | |
509 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | |
510 | ||
511 | # elif defined(BN_UMULT_HIGH) | |
a7a44ba5 AP |
512 | /* |
513 | * Keep in mind that additions to hi can not overflow, because | |
514 | * the high word of a multiplication result cannot be all-ones. | |
515 | */ | |
0f113f3e MC |
516 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
517 | BN_ULONG ta = (a), tb = (b); \ | |
518 | BN_ULONG lo = ta * tb; \ | |
519 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ | |
520 | c0 += lo; hi += (c0<lo)?1:0; \ | |
521 | c1 += hi; c2 += (c1<hi)?1:0; \ | |
522 | } while(0) | |
523 | ||
524 | # define mul_add_c2(a,b,c0,c1,c2) do { \ | |
525 | BN_ULONG ta = (a), tb = (b), tt; \ | |
526 | BN_ULONG lo = ta * tb; \ | |
527 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ | |
528 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ | |
529 | c1 += tt; c2 += (c1<tt)?1:0; \ | |
530 | c0 += lo; hi += (c0<lo)?1:0; \ | |
531 | c1 += hi; c2 += (c1<hi)?1:0; \ | |
532 | } while(0) | |
533 | ||
534 | # define sqr_add_c(a,i,c0,c1,c2) do { \ | |
535 | BN_ULONG ta = (a)[i]; \ | |
536 | BN_ULONG lo = ta * ta; \ | |
537 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ | |
538 | c0 += lo; hi += (c0<lo)?1:0; \ | |
539 | c1 += hi; c2 += (c1<hi)?1:0; \ | |
540 | } while(0) | |
541 | ||
542 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ | |
543 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | |
544 | ||
545 | # else /* !BN_LLONG */ | |
a7a44ba5 AP |
546 | /* |
547 | * Keep in mind that additions to hi can not overflow, because | |
548 | * the high word of a multiplication result cannot be all-ones. | |
549 | */ | |
0f113f3e MC |
550 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
551 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ | |
552 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | |
553 | mul64(lo,hi,bl,bh); \ | |
554 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | |
555 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
556 | } while(0) | |
557 | ||
558 | # define mul_add_c2(a,b,c0,c1,c2) do { \ | |
559 | BN_ULONG tt; \ | |
560 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ | |
561 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | |
562 | mul64(lo,hi,bl,bh); \ | |
563 | tt = hi; \ | |
564 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | |
565 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ | |
566 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | |
567 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
568 | } while(0) | |
569 | ||
570 | # define sqr_add_c(a,i,c0,c1,c2) do { \ | |
571 | BN_ULONG lo, hi; \ | |
572 | sqr64(lo,hi,(a)[i]); \ | |
573 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | |
574 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | |
575 | } while(0) | |
dfeab068 | 576 | |
7cc63545 | 577 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ |
0f113f3e MC |
578 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
579 | # endif /* !BN_LLONG */ | |
dfeab068 | 580 | |
6b691a5c | 581 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
0f113f3e MC |
582 | { |
583 | BN_ULONG c1, c2, c3; | |
584 | ||
585 | c1 = 0; | |
586 | c2 = 0; | |
587 | c3 = 0; | |
588 | mul_add_c(a[0], b[0], c1, c2, c3); | |
589 | r[0] = c1; | |
590 | c1 = 0; | |
591 | mul_add_c(a[0], b[1], c2, c3, c1); | |
592 | mul_add_c(a[1], b[0], c2, c3, c1); | |
593 | r[1] = c2; | |
594 | c2 = 0; | |
595 | mul_add_c(a[2], b[0], c3, c1, c2); | |
596 | mul_add_c(a[1], b[1], c3, c1, c2); | |
597 | mul_add_c(a[0], b[2], c3, c1, c2); | |
598 | r[2] = c3; | |
599 | c3 = 0; | |
600 | mul_add_c(a[0], b[3], c1, c2, c3); | |
601 | mul_add_c(a[1], b[2], c1, c2, c3); | |
602 | mul_add_c(a[2], b[1], c1, c2, c3); | |
603 | mul_add_c(a[3], b[0], c1, c2, c3); | |
604 | r[3] = c1; | |
605 | c1 = 0; | |
606 | mul_add_c(a[4], b[0], c2, c3, c1); | |
607 | mul_add_c(a[3], b[1], c2, c3, c1); | |
608 | mul_add_c(a[2], b[2], c2, c3, c1); | |
609 | mul_add_c(a[1], b[3], c2, c3, c1); | |
610 | mul_add_c(a[0], b[4], c2, c3, c1); | |
611 | r[4] = c2; | |
612 | c2 = 0; | |
613 | mul_add_c(a[0], b[5], c3, c1, c2); | |
614 | mul_add_c(a[1], b[4], c3, c1, c2); | |
615 | mul_add_c(a[2], b[3], c3, c1, c2); | |
616 | mul_add_c(a[3], b[2], c3, c1, c2); | |
617 | mul_add_c(a[4], b[1], c3, c1, c2); | |
618 | mul_add_c(a[5], b[0], c3, c1, c2); | |
619 | r[5] = c3; | |
620 | c3 = 0; | |
621 | mul_add_c(a[6], b[0], c1, c2, c3); | |
622 | mul_add_c(a[5], b[1], c1, c2, c3); | |
623 | mul_add_c(a[4], b[2], c1, c2, c3); | |
624 | mul_add_c(a[3], b[3], c1, c2, c3); | |
625 | mul_add_c(a[2], b[4], c1, c2, c3); | |
626 | mul_add_c(a[1], b[5], c1, c2, c3); | |
627 | mul_add_c(a[0], b[6], c1, c2, c3); | |
628 | r[6] = c1; | |
629 | c1 = 0; | |
630 | mul_add_c(a[0], b[7], c2, c3, c1); | |
631 | mul_add_c(a[1], b[6], c2, c3, c1); | |
632 | mul_add_c(a[2], b[5], c2, c3, c1); | |
633 | mul_add_c(a[3], b[4], c2, c3, c1); | |
634 | mul_add_c(a[4], b[3], c2, c3, c1); | |
635 | mul_add_c(a[5], b[2], c2, c3, c1); | |
636 | mul_add_c(a[6], b[1], c2, c3, c1); | |
637 | mul_add_c(a[7], b[0], c2, c3, c1); | |
638 | r[7] = c2; | |
639 | c2 = 0; | |
640 | mul_add_c(a[7], b[1], c3, c1, c2); | |
641 | mul_add_c(a[6], b[2], c3, c1, c2); | |
642 | mul_add_c(a[5], b[3], c3, c1, c2); | |
643 | mul_add_c(a[4], b[4], c3, c1, c2); | |
644 | mul_add_c(a[3], b[5], c3, c1, c2); | |
645 | mul_add_c(a[2], b[6], c3, c1, c2); | |
646 | mul_add_c(a[1], b[7], c3, c1, c2); | |
647 | r[8] = c3; | |
648 | c3 = 0; | |
649 | mul_add_c(a[2], b[7], c1, c2, c3); | |
650 | mul_add_c(a[3], b[6], c1, c2, c3); | |
651 | mul_add_c(a[4], b[5], c1, c2, c3); | |
652 | mul_add_c(a[5], b[4], c1, c2, c3); | |
653 | mul_add_c(a[6], b[3], c1, c2, c3); | |
654 | mul_add_c(a[7], b[2], c1, c2, c3); | |
655 | r[9] = c1; | |
656 | c1 = 0; | |
657 | mul_add_c(a[7], b[3], c2, c3, c1); | |
658 | mul_add_c(a[6], b[4], c2, c3, c1); | |
659 | mul_add_c(a[5], b[5], c2, c3, c1); | |
660 | mul_add_c(a[4], b[6], c2, c3, c1); | |
661 | mul_add_c(a[3], b[7], c2, c3, c1); | |
662 | r[10] = c2; | |
663 | c2 = 0; | |
664 | mul_add_c(a[4], b[7], c3, c1, c2); | |
665 | mul_add_c(a[5], b[6], c3, c1, c2); | |
666 | mul_add_c(a[6], b[5], c3, c1, c2); | |
667 | mul_add_c(a[7], b[4], c3, c1, c2); | |
668 | r[11] = c3; | |
669 | c3 = 0; | |
670 | mul_add_c(a[7], b[5], c1, c2, c3); | |
671 | mul_add_c(a[6], b[6], c1, c2, c3); | |
672 | mul_add_c(a[5], b[7], c1, c2, c3); | |
673 | r[12] = c1; | |
674 | c1 = 0; | |
675 | mul_add_c(a[6], b[7], c2, c3, c1); | |
676 | mul_add_c(a[7], b[6], c2, c3, c1); | |
677 | r[13] = c2; | |
678 | c2 = 0; | |
679 | mul_add_c(a[7], b[7], c3, c1, c2); | |
680 | r[14] = c3; | |
681 | r[15] = c1; | |
682 | } | |
dfeab068 | 683 | |
6b691a5c | 684 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
0f113f3e MC |
685 | { |
686 | BN_ULONG c1, c2, c3; | |
687 | ||
688 | c1 = 0; | |
689 | c2 = 0; | |
690 | c3 = 0; | |
691 | mul_add_c(a[0], b[0], c1, c2, c3); | |
692 | r[0] = c1; | |
693 | c1 = 0; | |
694 | mul_add_c(a[0], b[1], c2, c3, c1); | |
695 | mul_add_c(a[1], b[0], c2, c3, c1); | |
696 | r[1] = c2; | |
697 | c2 = 0; | |
698 | mul_add_c(a[2], b[0], c3, c1, c2); | |
699 | mul_add_c(a[1], b[1], c3, c1, c2); | |
700 | mul_add_c(a[0], b[2], c3, c1, c2); | |
701 | r[2] = c3; | |
702 | c3 = 0; | |
703 | mul_add_c(a[0], b[3], c1, c2, c3); | |
704 | mul_add_c(a[1], b[2], c1, c2, c3); | |
705 | mul_add_c(a[2], b[1], c1, c2, c3); | |
706 | mul_add_c(a[3], b[0], c1, c2, c3); | |
707 | r[3] = c1; | |
708 | c1 = 0; | |
709 | mul_add_c(a[3], b[1], c2, c3, c1); | |
710 | mul_add_c(a[2], b[2], c2, c3, c1); | |
711 | mul_add_c(a[1], b[3], c2, c3, c1); | |
712 | r[4] = c2; | |
713 | c2 = 0; | |
714 | mul_add_c(a[2], b[3], c3, c1, c2); | |
715 | mul_add_c(a[3], b[2], c3, c1, c2); | |
716 | r[5] = c3; | |
717 | c3 = 0; | |
718 | mul_add_c(a[3], b[3], c1, c2, c3); | |
719 | r[6] = c1; | |
720 | r[7] = c2; | |
721 | } | |
dfeab068 | 722 | |
cbd48ba6 | 723 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
0f113f3e MC |
724 | { |
725 | BN_ULONG c1, c2, c3; | |
726 | ||
727 | c1 = 0; | |
728 | c2 = 0; | |
729 | c3 = 0; | |
730 | sqr_add_c(a, 0, c1, c2, c3); | |
731 | r[0] = c1; | |
732 | c1 = 0; | |
733 | sqr_add_c2(a, 1, 0, c2, c3, c1); | |
734 | r[1] = c2; | |
735 | c2 = 0; | |
736 | sqr_add_c(a, 1, c3, c1, c2); | |
737 | sqr_add_c2(a, 2, 0, c3, c1, c2); | |
738 | r[2] = c3; | |
739 | c3 = 0; | |
740 | sqr_add_c2(a, 3, 0, c1, c2, c3); | |
741 | sqr_add_c2(a, 2, 1, c1, c2, c3); | |
742 | r[3] = c1; | |
743 | c1 = 0; | |
744 | sqr_add_c(a, 2, c2, c3, c1); | |
745 | sqr_add_c2(a, 3, 1, c2, c3, c1); | |
746 | sqr_add_c2(a, 4, 0, c2, c3, c1); | |
747 | r[4] = c2; | |
748 | c2 = 0; | |
749 | sqr_add_c2(a, 5, 0, c3, c1, c2); | |
750 | sqr_add_c2(a, 4, 1, c3, c1, c2); | |
751 | sqr_add_c2(a, 3, 2, c3, c1, c2); | |
752 | r[5] = c3; | |
753 | c3 = 0; | |
754 | sqr_add_c(a, 3, c1, c2, c3); | |
755 | sqr_add_c2(a, 4, 2, c1, c2, c3); | |
756 | sqr_add_c2(a, 5, 1, c1, c2, c3); | |
757 | sqr_add_c2(a, 6, 0, c1, c2, c3); | |
758 | r[6] = c1; | |
759 | c1 = 0; | |
760 | sqr_add_c2(a, 7, 0, c2, c3, c1); | |
761 | sqr_add_c2(a, 6, 1, c2, c3, c1); | |
762 | sqr_add_c2(a, 5, 2, c2, c3, c1); | |
763 | sqr_add_c2(a, 4, 3, c2, c3, c1); | |
764 | r[7] = c2; | |
765 | c2 = 0; | |
766 | sqr_add_c(a, 4, c3, c1, c2); | |
767 | sqr_add_c2(a, 5, 3, c3, c1, c2); | |
768 | sqr_add_c2(a, 6, 2, c3, c1, c2); | |
769 | sqr_add_c2(a, 7, 1, c3, c1, c2); | |
770 | r[8] = c3; | |
771 | c3 = 0; | |
772 | sqr_add_c2(a, 7, 2, c1, c2, c3); | |
773 | sqr_add_c2(a, 6, 3, c1, c2, c3); | |
774 | sqr_add_c2(a, 5, 4, c1, c2, c3); | |
775 | r[9] = c1; | |
776 | c1 = 0; | |
777 | sqr_add_c(a, 5, c2, c3, c1); | |
778 | sqr_add_c2(a, 6, 4, c2, c3, c1); | |
779 | sqr_add_c2(a, 7, 3, c2, c3, c1); | |
780 | r[10] = c2; | |
781 | c2 = 0; | |
782 | sqr_add_c2(a, 7, 4, c3, c1, c2); | |
783 | sqr_add_c2(a, 6, 5, c3, c1, c2); | |
784 | r[11] = c3; | |
785 | c3 = 0; | |
786 | sqr_add_c(a, 6, c1, c2, c3); | |
787 | sqr_add_c2(a, 7, 5, c1, c2, c3); | |
788 | r[12] = c1; | |
789 | c1 = 0; | |
790 | sqr_add_c2(a, 7, 6, c2, c3, c1); | |
791 | r[13] = c2; | |
792 | c2 = 0; | |
793 | sqr_add_c(a, 7, c3, c1, c2); | |
794 | r[14] = c3; | |
795 | r[15] = c1; | |
796 | } | |
dfeab068 | 797 | |
cbd48ba6 | 798 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
0f113f3e MC |
799 | { |
800 | BN_ULONG c1, c2, c3; | |
801 | ||
802 | c1 = 0; | |
803 | c2 = 0; | |
804 | c3 = 0; | |
805 | sqr_add_c(a, 0, c1, c2, c3); | |
806 | r[0] = c1; | |
807 | c1 = 0; | |
808 | sqr_add_c2(a, 1, 0, c2, c3, c1); | |
809 | r[1] = c2; | |
810 | c2 = 0; | |
811 | sqr_add_c(a, 1, c3, c1, c2); | |
812 | sqr_add_c2(a, 2, 0, c3, c1, c2); | |
813 | r[2] = c3; | |
814 | c3 = 0; | |
815 | sqr_add_c2(a, 3, 0, c1, c2, c3); | |
816 | sqr_add_c2(a, 2, 1, c1, c2, c3); | |
817 | r[3] = c1; | |
818 | c1 = 0; | |
819 | sqr_add_c(a, 2, c2, c3, c1); | |
820 | sqr_add_c2(a, 3, 1, c2, c3, c1); | |
821 | r[4] = c2; | |
822 | c2 = 0; | |
823 | sqr_add_c2(a, 3, 2, c3, c1, c2); | |
824 | r[5] = c3; | |
825 | c3 = 0; | |
826 | sqr_add_c(a, 3, c1, c2, c3); | |
827 | r[6] = c1; | |
828 | r[7] = c2; | |
829 | } | |
830 | ||
831 | # ifdef OPENSSL_NO_ASM | |
832 | # ifdef OPENSSL_BN_ASM_MONT | |
833 | # include <alloca.h> | |
e7382805 AP |
834 | /* |
835 | * This is essentially reference implementation, which may or may not | |
ca04d7a2 AP |
836 | * result in performance improvement. E.g. on IA-32 this routine was |
837 | * observed to give 40% faster rsa1024 private key operations and 10% | |
838 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only | |
839 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a | |
df94f187 AP |
840 | * reference implementation, one to be used as starting point for |
841 | * platform-specific assembler. Mentioned numbers apply to compiler | |
842 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and | |
843 | * can vary not only from platform to platform, but even for compiler | |
844 | * versions. Assembler vs. assembler improvement coefficients can | |
845 | * [and are known to] differ and are to be documented elsewhere. | |
e7382805 | 846 | */ |
0f113f3e MC |
847 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
848 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | |
849 | { | |
850 | BN_ULONG c0, c1, ml, *tp, n0; | |
851 | # ifdef mul64 | |
852 | BN_ULONG mh; | |
853 | # endif | |
854 | volatile BN_ULONG *vp; | |
855 | int i = 0, j; | |
856 | ||
857 | # if 0 /* template for platform-specific | |
858 | * implementation */ | |
859 | if (ap == bp) | |
860 | return bn_sqr_mont(rp, ap, np, n0p, num); | |
861 | # endif | |
862 | vp = tp = alloca((num + 2) * sizeof(BN_ULONG)); | |
863 | ||
864 | n0 = *n0p; | |
865 | ||
866 | c0 = 0; | |
867 | ml = bp[0]; | |
868 | # ifdef mul64 | |
869 | mh = HBITS(ml); | |
870 | ml = LBITS(ml); | |
871 | for (j = 0; j < num; ++j) | |
872 | mul(tp[j], ap[j], ml, mh, c0); | |
873 | # else | |
874 | for (j = 0; j < num; ++j) | |
875 | mul(tp[j], ap[j], ml, c0); | |
876 | # endif | |
877 | ||
878 | tp[num] = c0; | |
879 | tp[num + 1] = 0; | |
880 | goto enter; | |
881 | ||
882 | for (i = 0; i < num; i++) { | |
883 | c0 = 0; | |
884 | ml = bp[i]; | |
885 | # ifdef mul64 | |
886 | mh = HBITS(ml); | |
887 | ml = LBITS(ml); | |
888 | for (j = 0; j < num; ++j) | |
889 | mul_add(tp[j], ap[j], ml, mh, c0); | |
890 | # else | |
891 | for (j = 0; j < num; ++j) | |
892 | mul_add(tp[j], ap[j], ml, c0); | |
893 | # endif | |
894 | c1 = (tp[num] + c0) & BN_MASK2; | |
895 | tp[num] = c1; | |
896 | tp[num + 1] = (c1 < c0 ? 1 : 0); | |
897 | enter: | |
898 | c1 = tp[0]; | |
899 | ml = (c1 * n0) & BN_MASK2; | |
900 | c0 = 0; | |
901 | # ifdef mul64 | |
902 | mh = HBITS(ml); | |
903 | ml = LBITS(ml); | |
904 | mul_add(c1, np[0], ml, mh, c0); | |
905 | # else | |
906 | mul_add(c1, ml, np[0], c0); | |
907 | # endif | |
908 | for (j = 1; j < num; j++) { | |
909 | c1 = tp[j]; | |
910 | # ifdef mul64 | |
911 | mul_add(c1, np[j], ml, mh, c0); | |
912 | # else | |
913 | mul_add(c1, ml, np[j], c0); | |
914 | # endif | |
915 | tp[j - 1] = c1 & BN_MASK2; | |
916 | } | |
917 | c1 = (tp[num] + c0) & BN_MASK2; | |
918 | tp[num - 1] = c1; | |
919 | tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0); | |
920 | } | |
921 | ||
922 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | |
923 | c0 = bn_sub_words(rp, tp, np, num); | |
924 | if (tp[num] != 0 || c0 == 0) { | |
925 | for (i = 0; i < num + 2; i++) | |
926 | vp[i] = 0; | |
927 | return 1; | |
928 | } | |
929 | } | |
930 | for (i = 0; i < num; i++) | |
931 | rp[i] = tp[i], vp[i] = 0; | |
932 | vp[num] = 0; | |
933 | vp[num + 1] = 0; | |
934 | return 1; | |
935 | } | |
936 | # else | |
ca04d7a2 AP |
937 | /* |
938 | * Return value of 0 indicates that multiplication/convolution was not | |
939 | * performed to signal the caller to fall down to alternative/original | |
940 | * code-path. | |
941 | */ | |
0f113f3e MC |
942 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
943 | const BN_ULONG *np, const BN_ULONG *n0, int num) | |
944 | { | |
945 | return 0; | |
946 | } | |
947 | # endif /* OPENSSL_BN_ASM_MONT */ | |
948 | # endif | |
e7382805 | 949 | |
0f113f3e | 950 | #else /* !BN_MUL_COMBA */ |
dfeab068 RE |
951 | |
952 | /* hmm... is it faster just to do a multiply? */ | |
0f113f3e MC |
953 | # undef bn_sqr_comba4 |
954 | # undef bn_sqr_comba8 | |
e7382805 | 955 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
0f113f3e MC |
956 | { |
957 | BN_ULONG t[8]; | |
958 | bn_sqr_normal(r, a, 4, t); | |
959 | } | |
dfeab068 | 960 | |
e7382805 | 961 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
0f113f3e MC |
962 | { |
963 | BN_ULONG t[16]; | |
964 | bn_sqr_normal(r, a, 8, t); | |
965 | } | |
dfeab068 | 966 | |
6b691a5c | 967 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
0f113f3e MC |
968 | { |
969 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); | |
970 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); | |
971 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]); | |
972 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); | |
973 | } | |
dfeab068 | 974 | |
6b691a5c | 975 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
0f113f3e MC |
976 | { |
977 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); | |
978 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); | |
979 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]); | |
980 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]); | |
981 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]); | |
982 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]); | |
983 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]); | |
984 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]); | |
985 | } | |
986 | ||
987 | # ifdef OPENSSL_NO_ASM | |
988 | # ifdef OPENSSL_BN_ASM_MONT | |
989 | # include <alloca.h> | |
990 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | |
991 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | |
992 | { | |
993 | BN_ULONG c0, c1, *tp, n0 = *n0p; | |
994 | volatile BN_ULONG *vp; | |
995 | int i = 0, j; | |
996 | ||
997 | vp = tp = alloca((num + 2) * sizeof(BN_ULONG)); | |
998 | ||
999 | for (i = 0; i <= num; i++) | |
1000 | tp[i] = 0; | |
1001 | ||
1002 | for (i = 0; i < num; i++) { | |
1003 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); | |
1004 | c1 = (tp[num] + c0) & BN_MASK2; | |
1005 | tp[num] = c1; | |
1006 | tp[num + 1] = (c1 < c0 ? 1 : 0); | |
1007 | ||
1008 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); | |
1009 | c1 = (tp[num] + c0) & BN_MASK2; | |
1010 | tp[num] = c1; | |
1011 | tp[num + 1] += (c1 < c0 ? 1 : 0); | |
1012 | for (j = 0; j <= num; j++) | |
1013 | tp[j] = tp[j + 1]; | |
1014 | } | |
1015 | ||
1016 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | |
1017 | c0 = bn_sub_words(rp, tp, np, num); | |
1018 | if (tp[num] != 0 || c0 == 0) { | |
1019 | for (i = 0; i < num + 2; i++) | |
1020 | vp[i] = 0; | |
1021 | return 1; | |
1022 | } | |
1023 | } | |
1024 | for (i = 0; i < num; i++) | |
1025 | rp[i] = tp[i], vp[i] = 0; | |
1026 | vp[num] = 0; | |
1027 | vp[num + 1] = 0; | |
1028 | return 1; | |
1029 | } | |
1030 | # else | |
1031 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | |
1032 | const BN_ULONG *np, const BN_ULONG *n0, int num) | |
1033 | { | |
1034 | return 0; | |
1035 | } | |
1036 | # endif /* OPENSSL_BN_ASM_MONT */ | |
1037 | # endif | |
1038 | ||
1039 | #endif /* !BN_MUL_COMBA */ |