]>
Commit | Line | Data |
---|---|---|
d02b48c6 | 1 | /* crypto/bn/bn_exp.c */ |
58964a49 | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
d02b48c6 RE |
3 | * All rights reserved. |
4 | * | |
5 | * This package is an SSL implementation written | |
6 | * by Eric Young (eay@cryptsoft.com). | |
7 | * The implementation was written so as to conform with Netscapes SSL. | |
8 | * | |
9 | * This library is free for commercial and non-commercial use as long as | |
10 | * the following conditions are aheared to. The following conditions | |
11 | * apply to all code found in this distribution, be it the RC4, RSA, | |
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | |
13 | * included with this distribution is covered by the same copyright terms | |
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | |
15 | * | |
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | |
17 | * the code are not to be removed. | |
18 | * If this package is used in a product, Eric Young should be given attribution | |
19 | * as the author of the parts of the library used. | |
20 | * This can be in the form of a textual message at program startup or | |
21 | * in documentation (online or textual) provided with the package. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * 1. Redistributions of source code must retain the copyright | |
27 | * notice, this list of conditions and the following disclaimer. | |
28 | * 2. Redistributions in binary form must reproduce the above copyright | |
29 | * notice, this list of conditions and the following disclaimer in the | |
30 | * documentation and/or other materials provided with the distribution. | |
31 | * 3. All advertising materials mentioning features or use of this software | |
32 | * must display the following acknowledgement: | |
33 | * "This product includes cryptographic software written by | |
34 | * Eric Young (eay@cryptsoft.com)" | |
35 | * The word 'cryptographic' can be left out if the rouines from the library | |
36 | * being used are not cryptographic related :-). | |
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | |
38 | * the apps directory (application code) you must include an acknowledgement: | |
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | |
40 | * | |
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | |
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
51 | * SUCH DAMAGE. | |
52 | * | |
53 | * The licence and distribution terms for any publically available version or | |
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | |
55 | * copied and put under another distribution licence | |
56 | * [including the GNU Public Licence.] | |
57 | */ | |
f8989a21 | 58 | /* ==================================================================== |
46a64376 | 59 | * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. |
f8989a21 BM |
60 | * |
61 | * Redistribution and use in source and binary forms, with or without | |
62 | * modification, are permitted provided that the following conditions | |
63 | * are met: | |
64 | * | |
65 | * 1. Redistributions of source code must retain the above copyright | |
66 | * notice, this list of conditions and the following disclaimer. | |
67 | * | |
68 | * 2. Redistributions in binary form must reproduce the above copyright | |
69 | * notice, this list of conditions and the following disclaimer in | |
70 | * the documentation and/or other materials provided with the | |
71 | * distribution. | |
72 | * | |
73 | * 3. All advertising materials mentioning features or use of this | |
74 | * software must display the following acknowledgment: | |
75 | * "This product includes software developed by the OpenSSL Project | |
76 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | |
77 | * | |
78 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
79 | * endorse or promote products derived from this software without | |
80 | * prior written permission. For written permission, please contact | |
81 | * openssl-core@openssl.org. | |
82 | * | |
83 | * 5. Products derived from this software may not be called "OpenSSL" | |
84 | * nor may "OpenSSL" appear in their names without prior written | |
85 | * permission of the OpenSSL Project. | |
86 | * | |
87 | * 6. Redistributions of any form whatsoever must retain the following | |
88 | * acknowledgment: | |
89 | * "This product includes software developed by the OpenSSL Project | |
90 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | |
91 | * | |
92 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
93 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
94 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
95 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
96 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
97 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
98 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
99 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
100 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
101 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
102 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
103 | * OF THE POSSIBILITY OF SUCH DAMAGE. | |
104 | * ==================================================================== | |
105 | * | |
106 | * This product includes cryptographic software written by Eric Young | |
107 | * (eay@cryptsoft.com). This product includes software written by Tim | |
108 | * Hudson (tjh@cryptsoft.com). | |
109 | * | |
110 | */ | |
111 | ||
7edfe674 | 112 | #define OPENSSL_FIPSAPI |
d02b48c6 | 113 | |
d02b48c6 RE |
114 | #include "cryptlib.h" |
115 | #include "bn_lcl.h" | |
6dad7bd6 | 116 | |
361512da AP |
117 | #include <stdlib.h> |
118 | #ifdef _WIN32 | |
119 | # include <malloc.h> | |
120 | # ifndef alloca | |
121 | # define alloca _alloca | |
122 | # endif | |
123 | #elif defined(__GNUC__) | |
124 | # ifndef alloca | |
125 | # define alloca(s) __builtin_alloca((s)) | |
126 | # endif | |
b74ce8d9 AP |
127 | #elif defined(__sun) |
128 | # include <alloca.h> | |
361512da AP |
129 | #endif |
130 | ||
ca48ace5 AP |
131 | #undef RSAZ_ENABLED |
132 | #if defined(OPENSSL_BN_ASM_MONT) && \ | |
133 | (defined(__x86_64) || defined(__x86_64__) || \ | |
134 | defined(_M_AMD64) || defined(_M_X64)) | |
135 | # include "rsaz_exp.h" | |
136 | # define RSAZ_ENABLED | |
137 | #endif | |
138 | ||
cbce8c46 | 139 | #undef SPARC_T4_MONT |
b69437e1 | 140 | #if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
68c06bf6 AP |
141 | # include "sparc_arch.h" |
142 | extern unsigned int OPENSSL_sparcv9cap_P[]; | |
cbce8c46 | 143 | # define SPARC_T4_MONT |
68c06bf6 AP |
144 | #endif |
145 | ||
46a64376 | 146 | /* maximum precomputation table size for *variable* sliding windows */ |
dc434bbc | 147 | #define TABLE_SIZE 32 |
dfeab068 | 148 | |
58964a49 | 149 | /* this one works - simple but works */ |
020fc820 | 150 | int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) |
58964a49 | 151 | { |
9b141126 | 152 | int i,bits,ret=0; |
a0a54079 | 153 | BIGNUM *v,*rr; |
58964a49 | 154 | |
bd31fb21 | 155 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) |
46a64376 | 156 | { |
bd31fb21 | 157 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
46a64376 BM |
158 | BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
159 | return -1; | |
160 | } | |
161 | ||
9b141126 | 162 | BN_CTX_start(ctx); |
a0a54079 | 163 | if ((r == a) || (r == p)) |
9b141126 | 164 | rr = BN_CTX_get(ctx); |
a0a54079 | 165 | else |
9b141126 | 166 | rr = r; |
d70323f1 DSH |
167 | v = BN_CTX_get(ctx); |
168 | if (rr == NULL || v == NULL) goto err; | |
58964a49 RE |
169 | |
170 | if (BN_copy(v,a) == NULL) goto err; | |
171 | bits=BN_num_bits(p); | |
172 | ||
173 | if (BN_is_odd(p)) | |
a0a54079 MC |
174 | { if (BN_copy(rr,a) == NULL) goto err; } |
175 | else { if (!BN_one(rr)) goto err; } | |
58964a49 RE |
176 | |
177 | for (i=1; i<bits; i++) | |
178 | { | |
a0a54079 | 179 | if (!BN_sqr(v,v,ctx)) goto err; |
58964a49 RE |
180 | if (BN_is_bit_set(p,i)) |
181 | { | |
a0a54079 | 182 | if (!BN_mul(rr,rr,v,ctx)) goto err; |
58964a49 RE |
183 | } |
184 | } | |
185 | ret=1; | |
186 | err: | |
a0a54079 | 187 | if (r != rr) BN_copy(r,rr); |
9b141126 | 188 | BN_CTX_end(ctx); |
d870740c | 189 | bn_check_top(r); |
58964a49 RE |
190 | return(ret); |
191 | } | |
192 | ||
6dad7bd6 | 193 | |
020fc820 | 194 | int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
84c15db5 | 195 | BN_CTX *ctx) |
d02b48c6 RE |
196 | { |
197 | int ret; | |
198 | ||
dfeab068 RE |
199 | bn_check_top(a); |
200 | bn_check_top(p); | |
201 | bn_check_top(m); | |
202 | ||
78a0c1f1 BM |
203 | /* For even modulus m = 2^k*m_odd, it might make sense to compute |
204 | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery | |
205 | * exponentiation for the odd part), using appropriate exponent | |
206 | * reductions, and combine the results using the CRT. | |
207 | * | |
208 | * For now, we use Montgomery only if the modulus is odd; otherwise, | |
209 | * exponentiation using the reciprocal-based quick remaindering | |
210 | * algorithm is used. | |
211 | * | |
c94b6de0 BM |
212 | * (Timing obtained with expspeed.c [computations a^p mod m |
213 | * where a, p, m are of the same length: 256, 512, 1024, 2048, | |
214 | * 4096, 8192 bits], compared to the running time of the | |
215 | * standard algorithm: | |
216 | * | |
217 | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] | |
218 | * 55 .. 77 % [UltraSparc processor, but | |
219 | * debug-solaris-sparcv8-gcc conf.] | |
220 | * | |
221 | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] | |
222 | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] | |
223 | * | |
224 | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont | |
225 | * at 2048 and more bits, but at 512 and 1024 bits, it was | |
226 | * slower even than the standard algorithm! | |
227 | * | |
228 | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] | |
229 | * should be obtained when the new Montgomery reduction code | |
230 | * has been integrated into OpenSSL.) | |
78a0c1f1 BM |
231 | */ |
232 | ||
233 | #define MONT_MUL_MOD | |
25439b76 | 234 | #define MONT_EXP_WORD |
78a0c1f1 BM |
235 | #define RECP_MUL_MOD |
236 | ||
d02b48c6 RE |
237 | #ifdef MONT_MUL_MOD |
238 | /* I have finally been able to take out this pre-condition of | |
239 | * the top bit being set. It was caused by an error in BN_div | |
240 | * with negatives. There was also another problem when for a^b%m | |
241 | * a >= m. eay 07-May-97 */ | |
242 | /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ | |
243 | ||
244 | if (BN_is_odd(m)) | |
6dad7bd6 | 245 | { |
25439b76 | 246 | # ifdef MONT_EXP_WORD |
bd31fb21 | 247 | if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) |
6dad7bd6 BM |
248 | { |
249 | BN_ULONG A = a->d[0]; | |
250 | ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); | |
251 | } | |
252 | else | |
25439b76 | 253 | # endif |
6dad7bd6 BM |
254 | ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); |
255 | } | |
d02b48c6 RE |
256 | else |
257 | #endif | |
258 | #ifdef RECP_MUL_MOD | |
259 | { ret=BN_mod_exp_recp(r,a,p,m,ctx); } | |
260 | #else | |
261 | { ret=BN_mod_exp_simple(r,a,p,m,ctx); } | |
262 | #endif | |
263 | ||
d870740c | 264 | bn_check_top(r); |
d02b48c6 RE |
265 | return(ret); |
266 | } | |
267 | ||
6dad7bd6 | 268 | |
84c15db5 BL |
269 | int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
270 | const BIGNUM *m, BN_CTX *ctx) | |
d02b48c6 | 271 | { |
dfeab068 | 272 | int i,j,bits,ret=0,wstart,wend,window,wvalue; |
c86f2054 | 273 | int start=1; |
dfeab068 | 274 | BIGNUM *aa; |
c86f2054 GT |
275 | /* Table of variables obtained from 'ctx' */ |
276 | BIGNUM *val[TABLE_SIZE]; | |
dfeab068 | 277 | BN_RECP_CTX recp; |
d02b48c6 | 278 | |
bd31fb21 | 279 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) |
46a64376 | 280 | { |
bd31fb21 | 281 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
46a64376 BM |
282 | BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
283 | return -1; | |
284 | } | |
285 | ||
d02b48c6 RE |
286 | bits=BN_num_bits(p); |
287 | ||
288 | if (bits == 0) | |
289 | { | |
cd2eebfd BM |
290 | ret = BN_one(r); |
291 | return ret; | |
292 | } | |
9b141126 UM |
293 | |
294 | BN_CTX_start(ctx); | |
c86f2054 GT |
295 | aa = BN_CTX_get(ctx); |
296 | val[0] = BN_CTX_get(ctx); | |
297 | if(!aa || !val[0]) goto err; | |
9b141126 | 298 | |
dfeab068 | 299 | BN_RECP_CTX_init(&recp); |
8dea52fa BM |
300 | if (m->neg) |
301 | { | |
302 | /* ignore sign of 'm' */ | |
303 | if (!BN_copy(aa, m)) goto err; | |
304 | aa->neg = 0; | |
305 | if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err; | |
306 | } | |
307 | else | |
308 | { | |
309 | if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err; | |
310 | } | |
dfeab068 | 311 | |
c86f2054 GT |
312 | if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ |
313 | if (BN_is_zero(val[0])) | |
73c2522c | 314 | { |
b6358c89 GT |
315 | BN_zero(r); |
316 | ret = 1; | |
73c2522c BM |
317 | goto err; |
318 | } | |
d02b48c6 | 319 | |
dc434bbc BM |
320 | window = BN_window_bits_for_exponent_size(bits); |
321 | if (window > 1) | |
d02b48c6 | 322 | { |
c86f2054 | 323 | if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx)) |
dc434bbc BM |
324 | goto err; /* 2 */ |
325 | j=1<<(window-1); | |
326 | for (i=1; i<j; i++) | |
327 | { | |
c86f2054 GT |
328 | if(((val[i] = BN_CTX_get(ctx)) == NULL) || |
329 | !BN_mod_mul_reciprocal(val[i],val[i-1], | |
330 | aa,&recp,ctx)) | |
dc434bbc BM |
331 | goto err; |
332 | } | |
d02b48c6 | 333 | } |
dc434bbc | 334 | |
d02b48c6 RE |
335 | start=1; /* This is used to avoid multiplication etc |
336 | * when there is only the value '1' in the | |
337 | * buffer. */ | |
338 | wvalue=0; /* The 'value' of the window */ | |
339 | wstart=bits-1; /* The top bit of the window */ | |
340 | wend=0; /* The bottom bit of the window */ | |
341 | ||
342 | if (!BN_one(r)) goto err; | |
343 | ||
344 | for (;;) | |
345 | { | |
346 | if (BN_is_bit_set(p,wstart) == 0) | |
347 | { | |
348 | if (!start) | |
dfeab068 | 349 | if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx)) |
d02b48c6 RE |
350 | goto err; |
351 | if (wstart == 0) break; | |
352 | wstart--; | |
353 | continue; | |
354 | } | |
355 | /* We now have wstart on a 'set' bit, we now need to work out | |
356 | * how bit a window to do. To do this we need to scan | |
357 | * forward until the last set bit before the end of the | |
358 | * window */ | |
359 | j=wstart; | |
360 | wvalue=1; | |
361 | wend=0; | |
362 | for (i=1; i<window; i++) | |
363 | { | |
364 | if (wstart-i < 0) break; | |
365 | if (BN_is_bit_set(p,wstart-i)) | |
366 | { | |
367 | wvalue<<=(i-wend); | |
368 | wvalue|=1; | |
369 | wend=i; | |
370 | } | |
371 | } | |
372 | ||
373 | /* wend is the size of the current window */ | |
374 | j=wend+1; | |
375 | /* add the 'bytes above' */ | |
376 | if (!start) | |
377 | for (i=0; i<j; i++) | |
378 | { | |
dfeab068 | 379 | if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx)) |
d02b48c6 RE |
380 | goto err; |
381 | } | |
382 | ||
383 | /* wvalue will be an odd number < 2^window */ | |
c86f2054 | 384 | if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx)) |
d02b48c6 RE |
385 | goto err; |
386 | ||
387 | /* move the 'window' down further */ | |
388 | wstart-=wend+1; | |
389 | wvalue=0; | |
390 | start=0; | |
391 | if (wstart < 0) break; | |
392 | } | |
393 | ret=1; | |
394 | err: | |
9b141126 | 395 | BN_CTX_end(ctx); |
dfeab068 | 396 | BN_RECP_CTX_free(&recp); |
d870740c | 397 | bn_check_top(r); |
d02b48c6 RE |
398 | return(ret); |
399 | } | |
d02b48c6 | 400 | |
6dad7bd6 | 401 | |
020fc820 | 402 | int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
84c15db5 | 403 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) |
d02b48c6 RE |
404 | { |
405 | int i,j,bits,ret=0,wstart,wend,window,wvalue; | |
c86f2054 | 406 | int start=1; |
84c15db5 | 407 | BIGNUM *d,*r; |
020fc820 | 408 | const BIGNUM *aa; |
c86f2054 GT |
409 | /* Table of variables obtained from 'ctx' */ |
410 | BIGNUM *val[TABLE_SIZE]; | |
d02b48c6 RE |
411 | BN_MONT_CTX *mont=NULL; |
412 | ||
bd31fb21 | 413 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) |
46a64376 BM |
414 | { |
415 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); | |
416 | } | |
417 | ||
dfeab068 RE |
418 | bn_check_top(a); |
419 | bn_check_top(p); | |
420 | bn_check_top(m); | |
421 | ||
82b2f57e | 422 | if (!BN_is_odd(m)) |
d02b48c6 RE |
423 | { |
424 | BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); | |
425 | return(0); | |
426 | } | |
d02b48c6 RE |
427 | bits=BN_num_bits(p); |
428 | if (bits == 0) | |
429 | { | |
cd2eebfd BM |
430 | ret = BN_one(rr); |
431 | return ret; | |
432 | } | |
73c2522c | 433 | |
9b141126 UM |
434 | BN_CTX_start(ctx); |
435 | d = BN_CTX_get(ctx); | |
436 | r = BN_CTX_get(ctx); | |
c86f2054 GT |
437 | val[0] = BN_CTX_get(ctx); |
438 | if (!d || !r || !val[0]) goto err; | |
d02b48c6 RE |
439 | |
440 | /* If this is not done, things will break in the montgomery | |
441 | * part */ | |
442 | ||
58964a49 RE |
443 | if (in_mont != NULL) |
444 | mont=in_mont; | |
445 | else | |
58964a49 RE |
446 | { |
447 | if ((mont=BN_MONT_CTX_new()) == NULL) goto err; | |
448 | if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; | |
449 | } | |
d02b48c6 | 450 | |
499e167f | 451 | if (a->neg || BN_ucmp(a,m) >= 0) |
d02b48c6 | 452 | { |
c86f2054 | 453 | if (!BN_nnmod(val[0],a,m,ctx)) |
6dad7bd6 | 454 | goto err; |
c86f2054 | 455 | aa= val[0]; |
d02b48c6 RE |
456 | } |
457 | else | |
458 | aa=a; | |
73c2522c BM |
459 | if (BN_is_zero(aa)) |
460 | { | |
b6358c89 GT |
461 | BN_zero(rr); |
462 | ret = 1; | |
73c2522c BM |
463 | goto err; |
464 | } | |
c86f2054 | 465 | if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */ |
d02b48c6 | 466 | |
dc434bbc BM |
467 | window = BN_window_bits_for_exponent_size(bits); |
468 | if (window > 1) | |
d02b48c6 | 469 | { |
c86f2054 | 470 | if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ |
dc434bbc BM |
471 | j=1<<(window-1); |
472 | for (i=1; i<j; i++) | |
473 | { | |
c86f2054 GT |
474 | if(((val[i] = BN_CTX_get(ctx)) == NULL) || |
475 | !BN_mod_mul_montgomery(val[i],val[i-1], | |
476 | d,mont,ctx)) | |
dc434bbc BM |
477 | goto err; |
478 | } | |
d02b48c6 | 479 | } |
d02b48c6 RE |
480 | |
481 | start=1; /* This is used to avoid multiplication etc | |
482 | * when there is only the value '1' in the | |
483 | * buffer. */ | |
484 | wvalue=0; /* The 'value' of the window */ | |
485 | wstart=bits-1; /* The top bit of the window */ | |
486 | wend=0; /* The bottom bit of the window */ | |
487 | ||
4ddacd99 | 488 | #if 1 /* by Shay Gueron's suggestion */ |
cbce8c46 AP |
489 | j = m->top; /* borrow j */ |
490 | if (m->d[j-1] & (((BN_ULONG)1)<<(BN_BITS2-1))) | |
491 | { | |
492 | if (bn_wexpand(r,j) == NULL) goto err; | |
493 | /* 2^(top*BN_BITS2) - m */ | |
494 | r->d[0] = (0-m->d[0])&BN_MASK2; | |
495 | for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2; | |
496 | r->top = j; | |
497 | } | |
498 | else | |
4ddacd99 | 499 | #endif |
cbce8c46 | 500 | if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; |
d02b48c6 RE |
501 | for (;;) |
502 | { | |
503 | if (BN_is_bit_set(p,wstart) == 0) | |
504 | { | |
505 | if (!start) | |
58964a49 | 506 | { |
d02b48c6 RE |
507 | if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) |
508 | goto err; | |
58964a49 | 509 | } |
d02b48c6 RE |
510 | if (wstart == 0) break; |
511 | wstart--; | |
512 | continue; | |
513 | } | |
514 | /* We now have wstart on a 'set' bit, we now need to work out | |
515 | * how bit a window to do. To do this we need to scan | |
516 | * forward until the last set bit before the end of the | |
517 | * window */ | |
518 | j=wstart; | |
519 | wvalue=1; | |
520 | wend=0; | |
521 | for (i=1; i<window; i++) | |
522 | { | |
523 | if (wstart-i < 0) break; | |
524 | if (BN_is_bit_set(p,wstart-i)) | |
525 | { | |
526 | wvalue<<=(i-wend); | |
527 | wvalue|=1; | |
528 | wend=i; | |
529 | } | |
530 | } | |
531 | ||
532 | /* wend is the size of the current window */ | |
533 | j=wend+1; | |
534 | /* add the 'bytes above' */ | |
535 | if (!start) | |
536 | for (i=0; i<j; i++) | |
537 | { | |
538 | if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) | |
539 | goto err; | |
540 | } | |
541 | ||
542 | /* wvalue will be an odd number < 2^window */ | |
c86f2054 | 543 | if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx)) |
d02b48c6 RE |
544 | goto err; |
545 | ||
546 | /* move the 'window' down further */ | |
547 | wstart-=wend+1; | |
548 | wvalue=0; | |
549 | start=0; | |
550 | if (wstart < 0) break; | |
551 | } | |
cbce8c46 | 552 | #if defined(SPARC_T4_MONT) |
4ddacd99 AP |
553 | if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU)) |
554 | { | |
555 | j = mont->N.top; /* borrow j */ | |
556 | val[0]->d[0] = 1; /* borrow val[0] */ | |
557 | for (i=1;i<j;i++) val[0]->d[i] = 0; | |
558 | val[0]->top = j; | |
559 | if (!BN_mod_mul_montgomery(rr,r,val[0],mont,ctx)) goto err; | |
560 | } | |
561 | else | |
562 | #endif | |
e958c5af | 563 | if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; |
d02b48c6 RE |
564 | ret=1; |
565 | err: | |
58964a49 | 566 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); |
9b141126 | 567 | BN_CTX_end(ctx); |
d870740c | 568 | bn_check_top(rr); |
d02b48c6 RE |
569 | return(ret); |
570 | } | |
6dad7bd6 | 571 | |
cbce8c46 | 572 | #if defined(SPARC_T4_MONT) |
4ddacd99 AP |
573 | static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) |
574 | { | |
575 | BN_ULONG ret=0; | |
576 | int wordpos; | |
577 | ||
578 | wordpos = bitpos/BN_BITS2; | |
579 | bitpos %= BN_BITS2; | |
580 | if (wordpos>=0 && wordpos < a->top) | |
581 | { | |
582 | ret = a->d[wordpos]&BN_MASK2; | |
583 | if (bitpos) | |
584 | { | |
585 | ret >>= bitpos; | |
586 | if (++wordpos < a->top) | |
587 | ret |= a->d[wordpos]<<(BN_BITS2-bitpos); | |
588 | } | |
589 | } | |
590 | ||
591 | return ret&BN_MASK2; | |
592 | } | |
593 | #endif | |
46a64376 BM |
594 | |
595 | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout | |
596 | * so that accessing any of these table values shows the same access pattern as far | |
597 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM | |
598 | * from/to that table. */ | |
599 | ||
8329e2e7 | 600 | static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width) |
46a64376 BM |
601 | { |
602 | size_t i, j; | |
603 | ||
8329e2e7 AP |
604 | if (top > b->top) |
605 | top = b->top; /* this works because 'buf' is explicitly zeroed */ | |
46a64376 BM |
606 | for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) |
607 | { | |
608 | buf[j] = ((unsigned char*)b->d)[i]; | |
609 | } | |
610 | ||
46a64376 BM |
611 | return 1; |
612 | } | |
613 | ||
6343829a | 614 | static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) |
46a64376 BM |
615 | { |
616 | size_t i, j; | |
617 | ||
618 | if (bn_wexpand(b, top) == NULL) | |
619 | return 0; | |
620 | ||
621 | for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) | |
622 | { | |
623 | ((unsigned char*)b->d)[i] = buf[j]; | |
624 | } | |
625 | ||
626 | b->top = top; | |
627 | bn_correct_top(b); | |
628 | return 1; | |
629 | } | |
630 | ||
631 | /* Given a pointer value, compute the next address that is a cache line multiple. */ | |
632 | #define MOD_EXP_CTIME_ALIGN(x_) \ | |
361512da | 633 | ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) |
46a64376 BM |
634 | |
635 | /* This variant of BN_mod_exp_mont() uses fixed windows and the special | |
636 | * precomputation memory layout to limit data-dependency to a minimum | |
637 | * to protect secret exponents (cf. the hyper-threading timing attacks | |
638 | * pointed out by Colin Percival, | |
639 | * http://www.daemonology.net/hyperthreading-considered-harmful/) | |
640 | */ | |
641 | int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, | |
642 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | |
643 | { | |
361512da | 644 | int i,bits,ret=0,window,wvalue; |
6343829a | 645 | int top; |
46a64376 BM |
646 | BN_MONT_CTX *mont=NULL; |
647 | ||
648 | int numPowers; | |
649 | unsigned char *powerbufFree=NULL; | |
6343829a | 650 | int powerbufLen = 0; |
46a64376 | 651 | unsigned char *powerbuf=NULL; |
8329e2e7 | 652 | BIGNUM tmp, am; |
cbce8c46 | 653 | #if defined(SPARC_T4_MONT) |
68c06bf6 AP |
654 | unsigned int t4=0; |
655 | #endif | |
46a64376 BM |
656 | |
657 | bn_check_top(a); | |
658 | bn_check_top(p); | |
659 | bn_check_top(m); | |
660 | ||
661 | top = m->top; | |
662 | ||
663 | if (!(m->d[0] & 1)) | |
664 | { | |
665 | BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS); | |
666 | return(0); | |
667 | } | |
668 | bits=BN_num_bits(p); | |
669 | if (bits == 0) | |
670 | { | |
671 | ret = BN_one(rr); | |
672 | return ret; | |
673 | } | |
674 | ||
46a64376 | 675 | BN_CTX_start(ctx); |
46a64376 BM |
676 | |
677 | /* Allocate a montgomery context if it was not supplied by the caller. | |
678 | * If this is not done, things will break in the montgomery part. | |
679 | */ | |
680 | if (in_mont != NULL) | |
681 | mont=in_mont; | |
682 | else | |
683 | { | |
684 | if ((mont=BN_MONT_CTX_new()) == NULL) goto err; | |
685 | if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; | |
686 | } | |
687 | ||
ca48ace5 AP |
688 | #ifdef RSAZ_ENABLED |
689 | /* | |
690 | * If the size of the operands allow it, perform the optimized | |
691 | * RSAZ exponentiation. For further information see | |
692 | * crypto/bn/rsaz_exp.c and accompanying assembly modules. | |
693 | */ | |
694 | if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) | |
695 | && rsaz_avx2_eligible()) | |
696 | { | |
697 | if (NULL == bn_wexpand(rr, 16)) goto err; | |
698 | RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]); | |
699 | rr->top = 16; | |
700 | rr->neg = 0; | |
701 | bn_correct_top(rr); | |
702 | ret = 1; | |
703 | goto err; | |
704 | } | |
705 | else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) | |
706 | { | |
707 | if (NULL == bn_wexpand(rr,8)) goto err; | |
708 | RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); | |
709 | rr->top = 8; | |
710 | rr->neg = 0; | |
711 | bn_correct_top(rr); | |
712 | ret = 1; | |
713 | goto err; | |
714 | } | |
715 | #endif | |
716 | ||
46a64376 BM |
717 | /* Get the window size to use with size of p. */ |
718 | window = BN_window_bits_for_ctime_exponent_size(bits); | |
cbce8c46 | 719 | #if defined(SPARC_T4_MONT) |
68c06bf6 AP |
720 | if (window>=5 && (top&15)==0 && top<=64 && |
721 | (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== | |
722 | (CFR_MONTMUL|CFR_MONTSQR) && | |
723 | (t4=OPENSSL_sparcv9cap_P[0])) | |
724 | window=5; | |
725 | else | |
726 | #endif | |
361512da AP |
727 | #if defined(OPENSSL_BN_ASM_MONT5) |
728 | if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ | |
729 | #endif | |
68c06bf6 | 730 | (void)0; |
46a64376 BM |
731 | |
732 | /* Allocate a buffer large enough to hold all of the pre-computed | |
8329e2e7 | 733 | * powers of am, am itself and tmp. |
46a64376 BM |
734 | */ |
735 | numPowers = 1 << window; | |
361512da | 736 | powerbufLen = sizeof(m->d[0])*(top*numPowers + |
8329e2e7 | 737 | ((2*top)>numPowers?(2*top):numPowers)); |
cfdbff23 | 738 | #ifdef alloca |
361512da AP |
739 | if (powerbufLen < 3072) |
740 | powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); | |
cfdbff23 AP |
741 | else |
742 | #endif | |
743 | if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) | |
46a64376 BM |
744 | goto err; |
745 | ||
746 | powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); | |
747 | memset(powerbuf, 0, powerbufLen); | |
748 | ||
cfdbff23 | 749 | #ifdef alloca |
361512da AP |
750 | if (powerbufLen < 3072) |
751 | powerbufFree = NULL; | |
cfdbff23 | 752 | #endif |
361512da | 753 | |
8329e2e7 AP |
754 | /* lay down tmp and am right after powers table */ |
755 | tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers); | |
756 | am.d = tmp.d + top; | |
757 | tmp.top = am.top = 0; | |
758 | tmp.dmax = am.dmax = top; | |
759 | tmp.neg = am.neg = 0; | |
760 | tmp.flags = am.flags = BN_FLG_STATIC_DATA; | |
761 | ||
762 | /* prepare a^0 in Montgomery domain */ | |
4ddacd99 | 763 | #if 1 /* by Shay Gueron's suggestion */ |
cbce8c46 AP |
764 | if (m->d[top-1] & (((BN_ULONG)1)<<(BN_BITS2-1))) |
765 | { | |
766 | /* 2^(top*BN_BITS2) - m */ | |
767 | tmp.d[0] = (0-m->d[0])&BN_MASK2; | |
768 | for (i=1;i<top;i++) tmp.d[i] = (~m->d[i])&BN_MASK2; | |
769 | tmp.top = top; | |
770 | } | |
771 | else | |
8329e2e7 | 772 | #endif |
cbce8c46 | 773 | if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; |
46a64376 | 774 | |
8329e2e7 | 775 | /* prepare a^1 in Montgomery domain */ |
46a64376 BM |
776 | if (a->neg || BN_ucmp(a,m) >= 0) |
777 | { | |
8329e2e7 AP |
778 | if (!BN_mod(&am,a,m,ctx)) goto err; |
779 | if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err; | |
46a64376 | 780 | } |
8329e2e7 | 781 | else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; |
361512da | 782 | |
cbce8c46 | 783 | #if defined(SPARC_T4_MONT) |
68c06bf6 AP |
784 | if (t4) |
785 | { | |
786 | typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np, | |
4ddacd99 | 787 | const BN_ULONG *n0,const void *table,int power,int bits); |
68c06bf6 | 788 | int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np, |
4ddacd99 | 789 | const BN_ULONG *n0,const void *table,int power,int bits); |
68c06bf6 | 790 | int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np, |
4ddacd99 | 791 | const BN_ULONG *n0,const void *table,int power,int bits); |
68c06bf6 | 792 | int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np, |
4ddacd99 | 793 | const BN_ULONG *n0,const void *table,int power,int bits); |
68c06bf6 | 794 | int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np, |
4ddacd99 AP |
795 | const BN_ULONG *n0,const void *table,int power,int bits); |
796 | static const bn_pwr5_mont_f pwr5_funcs[4] = { | |
68c06bf6 AP |
797 | bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, |
798 | bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 }; | |
4ddacd99 AP |
799 | bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1]; |
800 | ||
801 | typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap, | |
802 | const void *bp,const BN_ULONG *np,const BN_ULONG *n0); | |
803 | int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap, | |
804 | const void *bp,const BN_ULONG *np,const BN_ULONG *n0); | |
805 | int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap, | |
806 | const void *bp,const BN_ULONG *np,const BN_ULONG *n0); | |
807 | int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap, | |
808 | const void *bp,const BN_ULONG *np,const BN_ULONG *n0); | |
809 | int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap, | |
810 | const void *bp,const BN_ULONG *np,const BN_ULONG *n0); | |
811 | static const bn_mul_mont_f mul_funcs[4] = { | |
812 | bn_mul_mont_t4_8, bn_mul_mont_t4_16, | |
813 | bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; | |
814 | bn_mul_mont_f mul_worker = mul_funcs[top/16-1]; | |
815 | ||
816 | void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap, | |
817 | const void *bp,const BN_ULONG *np, | |
818 | const BN_ULONG *n0,int num); | |
68c06bf6 AP |
819 | void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap, |
820 | const void *bp,const BN_ULONG *np, | |
821 | const BN_ULONG *n0,int num); | |
822 | void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap, | |
823 | const void *table,const BN_ULONG *np, | |
824 | const BN_ULONG *n0,int num,int power); | |
4ddacd99 | 825 | void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num, |
68c06bf6 AP |
826 | void *table,size_t power); |
827 | void bn_gather5_t4(BN_ULONG *out,size_t num, | |
828 | void *table,size_t power); | |
829 | void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num); | |
830 | ||
4ddacd99 AP |
831 | BN_ULONG *np=mont->N.d, *n0=mont->n0; |
832 | int stride = 5*(6-(top/16-1)); /* multiple of 5, but less than 32 */ | |
68c06bf6 AP |
833 | |
834 | /* BN_to_montgomery can contaminate words above .top | |
835 | * [in BN_DEBUG[_DEBUG] build]... */ | |
836 | for (i=am.top; i<top; i++) am.d[i]=0; | |
837 | for (i=tmp.top; i<top; i++) tmp.d[i]=0; | |
838 | ||
4ddacd99 AP |
839 | bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0); |
840 | bn_flip_n_scatter5_t4(am.d,top,powerbuf,1); | |
841 | if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) && | |
842 | !(*mul_worker)(tmp.d,am.d,am.d,np,n0)) | |
843 | bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top); | |
844 | bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2); | |
68c06bf6 AP |
845 | |
846 | for (i=3; i<32; i++) | |
847 | { | |
848 | /* Calculate a^i = a^(i-1) * a */ | |
4ddacd99 AP |
849 | if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) && |
850 | !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0)) | |
851 | bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top); | |
852 | bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i); | |
68c06bf6 AP |
853 | } |
854 | ||
4ddacd99 AP |
855 | /* switch to 64-bit domain */ |
856 | np = alloca(top*sizeof(BN_ULONG)); | |
857 | top /= 2; | |
858 | bn_flip_t4(np,mont->N.d,top); | |
859 | ||
68c06bf6 AP |
860 | bits--; |
861 | for (wvalue=0, i=bits%5; i>=0; i--,bits--) | |
862 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); | |
863 | bn_gather5_t4(tmp.d,top,powerbuf,wvalue); | |
864 | ||
865 | /* Scan the exponent one window at a time starting from the most | |
866 | * significant bits. | |
867 | */ | |
868 | while (bits >= 0) | |
869 | { | |
4ddacd99 AP |
870 | if (bits < stride) stride = bits+1; |
871 | bits -= stride; | |
872 | wvalue = bn_get_bits(p,bits+1); | |
68c06bf6 | 873 | |
4ddacd99 | 874 | if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue; |
68c06bf6 | 875 | /* retry once and fall back */ |
4ddacd99 AP |
876 | if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue; |
877 | ||
878 | bits += stride-5; | |
879 | wvalue >>= stride-5; | |
880 | wvalue &= 31; | |
68c06bf6 AP |
881 | bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
882 | bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); | |
883 | bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); | |
884 | bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); | |
885 | bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); | |
886 | bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); | |
887 | } | |
888 | ||
889 | bn_flip_t4(tmp.d,tmp.d,top); | |
890 | top *= 2; | |
891 | /* back to 32-bit domain */ | |
892 | tmp.top=top; | |
893 | bn_correct_top(&tmp); | |
894 | OPENSSL_cleanse(np,top*sizeof(BN_ULONG)); | |
895 | } | |
896 | else | |
897 | #endif | |
361512da AP |
898 | #if defined(OPENSSL_BN_ASM_MONT5) |
899 | /* This optimization uses ideas from http://eprint.iacr.org/2011/239, | |
900 | * specifically optimization of cache-timing attack countermeasures | |
901 | * and pre-computation optimization. */ | |
902 | ||
903 | /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as | |
904 | * 512-bit RSA is hardly relevant, we omit it to spare size... */ | |
905 | if (window==5) | |
906 | { | |
907 | void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap, | |
908 | const void *table,const BN_ULONG *np, | |
909 | const BN_ULONG *n0,int num,int power); | |
910 | void bn_scatter5(const BN_ULONG *inp,size_t num, | |
911 | void *table,size_t power); | |
8329e2e7 AP |
912 | void bn_gather5(BN_ULONG *out,size_t num, |
913 | void *table,size_t power); | |
361512da | 914 | |
8329e2e7 | 915 | BN_ULONG *np=mont->N.d, *n0=mont->n0; |
361512da | 916 | |
09338871 AP |
917 | /* BN_to_montgomery can contaminate words above .top |
918 | * [in BN_DEBUG[_DEBUG] build]... */ | |
919 | for (i=am.top; i<top; i++) am.d[i]=0; | |
920 | for (i=tmp.top; i<top; i++) tmp.d[i]=0; | |
921 | ||
8329e2e7 AP |
922 | bn_scatter5(tmp.d,top,powerbuf,0); |
923 | bn_scatter5(am.d,am.top,powerbuf,1); | |
924 | bn_mul_mont(tmp.d,am.d,am.d,np,n0,top); | |
925 | bn_scatter5(tmp.d,top,powerbuf,2); | |
361512da | 926 | |
361512da | 927 | #if 0 |
8329e2e7 | 928 | for (i=3; i<32; i++) |
361512da | 929 | { |
8329e2e7 AP |
930 | /* Calculate a^i = a^(i-1) * a */ |
931 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); | |
932 | bn_scatter5(tmp.d,top,powerbuf,i); | |
361512da AP |
933 | } |
934 | #else | |
935 | /* same as above, but uses squaring for 1/2 of operations */ | |
8329e2e7 | 936 | for (i=4; i<32; i*=2) |
361512da | 937 | { |
8329e2e7 AP |
938 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); |
939 | bn_scatter5(tmp.d,top,powerbuf,i); | |
361512da AP |
940 | } |
941 | for (i=3; i<8; i+=2) | |
942 | { | |
943 | int j; | |
8329e2e7 AP |
944 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); |
945 | bn_scatter5(tmp.d,top,powerbuf,i); | |
361512da AP |
946 | for (j=2*i; j<32; j*=2) |
947 | { | |
8329e2e7 AP |
948 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); |
949 | bn_scatter5(tmp.d,top,powerbuf,j); | |
361512da AP |
950 | } |
951 | } | |
952 | for (; i<16; i+=2) | |
953 | { | |
8329e2e7 AP |
954 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); |
955 | bn_scatter5(tmp.d,top,powerbuf,i); | |
956 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | |
957 | bn_scatter5(tmp.d,top,powerbuf,2*i); | |
361512da AP |
958 | } |
959 | for (; i<32; i+=2) | |
960 | { | |
8329e2e7 AP |
961 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); |
962 | bn_scatter5(tmp.d,top,powerbuf,i); | |
361512da AP |
963 | } |
964 | #endif | |
8329e2e7 AP |
965 | bits--; |
966 | for (wvalue=0, i=bits%5; i>=0; i--,bits--) | |
967 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); | |
968 | bn_gather5(tmp.d,top,powerbuf,wvalue); | |
361512da AP |
969 | |
970 | /* Scan the exponent one window at a time starting from the most | |
971 | * significant bits. | |
972 | */ | |
361512da AP |
973 | while (bits >= 0) |
974 | { | |
975 | for (wvalue=0, i=0; i<5; i++,bits--) | |
976 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); | |
977 | ||
8329e2e7 AP |
978 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); |
979 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | |
980 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | |
981 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | |
982 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | |
983 | bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); | |
361512da AP |
984 | } |
985 | ||
8329e2e7 AP |
986 | tmp.top=top; |
987 | bn_correct_top(&tmp); | |
361512da AP |
988 | } |
989 | else | |
990 | #endif | |
991 | { | |
8329e2e7 AP |
992 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err; |
993 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err; | |
46a64376 BM |
994 | |
995 | /* If the window size is greater than 1, then calculate | |
996 | * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) | |
997 | * (even powers could instead be computed as (a^(i/2))^2 | |
998 | * to use the slight performance advantage of sqr over mul). | |
999 | */ | |
1000 | if (window > 1) | |
1001 | { | |
8329e2e7 AP |
1002 | if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err; |
1003 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err; | |
1004 | for (i=3; i<numPowers; i++) | |
46a64376 BM |
1005 | { |
1006 | /* Calculate a^i = a^(i-1) * a */ | |
8329e2e7 | 1007 | if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx)) |
46a64376 | 1008 | goto err; |
8329e2e7 | 1009 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err; |
46a64376 BM |
1010 | } |
1011 | } | |
1012 | ||
361512da | 1013 | bits--; |
8329e2e7 AP |
1014 | for (wvalue=0, i=bits%window; i>=0; i--,bits--) |
1015 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); | |
1016 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err; | |
1017 | ||
1018 | /* Scan the exponent one window at a time starting from the most | |
1019 | * significant bits. | |
1020 | */ | |
361512da | 1021 | while (bits >= 0) |
46a64376 BM |
1022 | { |
1023 | wvalue=0; /* The 'value' of the window */ | |
1024 | ||
1025 | /* Scan the window, squaring the result as we go */ | |
361512da | 1026 | for (i=0; i<window; i++,bits--) |
46a64376 | 1027 | { |
8329e2e7 | 1028 | if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx)) goto err; |
361512da | 1029 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); |
46a64376 BM |
1030 | } |
1031 | ||
1032 | /* Fetch the appropriate pre-computed value from the pre-buf */ | |
8329e2e7 | 1033 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err; |
46a64376 BM |
1034 | |
1035 | /* Multiply the result into the intermediate result */ | |
8329e2e7 | 1036 | if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err; |
46a64376 | 1037 | } |
361512da | 1038 | } |
46a64376 BM |
1039 | |
1040 | /* Convert the final result from montgomery to standard format */ | |
cbce8c46 | 1041 | #if defined(SPARC_T4_MONT) |
4ddacd99 AP |
1042 | if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU)) |
1043 | { | |
1044 | am.d[0] = 1; /* borrow am */ | |
1045 | for (i=1;i<top;i++) am.d[i] = 0; | |
1046 | if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx)) goto err; | |
1047 | } | |
1048 | else | |
1049 | #endif | |
8329e2e7 | 1050 | if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err; |
46a64376 BM |
1051 | ret=1; |
1052 | err: | |
1053 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); | |
1054 | if (powerbuf!=NULL) | |
1055 | { | |
1056 | OPENSSL_cleanse(powerbuf,powerbufLen); | |
361512da | 1057 | if (powerbufFree) OPENSSL_free(powerbufFree); |
46a64376 | 1058 | } |
46a64376 BM |
1059 | BN_CTX_end(ctx); |
1060 | return(ret); | |
1061 | } | |
1062 | ||
6dad7bd6 BM |
1063 | int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, |
1064 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | |
6dad7bd6 | 1065 | { |
f8989a21 | 1066 | BN_MONT_CTX *mont = NULL; |
6dad7bd6 | 1067 | int b, bits, ret=0; |
e958c5af | 1068 | int r_is_one; |
f8989a21 | 1069 | BN_ULONG w, next_w; |
6dad7bd6 | 1070 | BIGNUM *d, *r, *t; |
f8989a21 BM |
1071 | BIGNUM *swap_tmp; |
1072 | #define BN_MOD_MUL_WORD(r, w, m) \ | |
1073 | (BN_mul_word(r, (w)) && \ | |
fc57ebc0 | 1074 | (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ |
e958c5af BM |
1075 | (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) |
1076 | /* BN_MOD_MUL_WORD is only used with 'w' large, | |
8dea52fa BM |
1077 | * so the BN_ucmp test is probably more overhead |
1078 | * than always using BN_mod (which uses BN_copy if | |
1079 | * a similar test returns true). */ | |
1080 | /* We can use BN_mod and do not need BN_nnmod because our | |
1081 | * accumulator is never negative (the result of BN_mod does | |
1082 | * not depend on the sign of the modulus). | |
1083 | */ | |
e958c5af BM |
1084 | #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ |
1085 | (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) | |
6dad7bd6 | 1086 | |
bd31fb21 | 1087 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) |
46a64376 | 1088 | { |
bd31fb21 | 1089 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
46a64376 BM |
1090 | BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
1091 | return -1; | |
1092 | } | |
1093 | ||
6dad7bd6 BM |
1094 | bn_check_top(p); |
1095 | bn_check_top(m); | |
1096 | ||
82b2f57e | 1097 | if (!BN_is_odd(m)) |
6dad7bd6 BM |
1098 | { |
1099 | BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); | |
1100 | return(0); | |
1101 | } | |
25439b76 BM |
1102 | if (m->top == 1) |
1103 | a %= m->d[0]; /* make sure that 'a' is reduced */ | |
1104 | ||
6dad7bd6 BM |
1105 | bits = BN_num_bits(p); |
1106 | if (bits == 0) | |
1107 | { | |
2b0180c3 AL |
1108 | /* x**0 mod 1 is still zero. */ |
1109 | if (BN_is_one(m)) | |
1110 | { | |
1111 | ret = 1; | |
1112 | BN_zero(rr); | |
1113 | } | |
1114 | else | |
1115 | ret = BN_one(rr); | |
cd2eebfd | 1116 | return ret; |
6dad7bd6 | 1117 | } |
cd2eebfd BM |
1118 | if (a == 0) |
1119 | { | |
b6358c89 GT |
1120 | BN_zero(rr); |
1121 | ret = 1; | |
cd2eebfd BM |
1122 | return ret; |
1123 | } | |
1124 | ||
6dad7bd6 BM |
1125 | BN_CTX_start(ctx); |
1126 | d = BN_CTX_get(ctx); | |
1127 | r = BN_CTX_get(ctx); | |
1128 | t = BN_CTX_get(ctx); | |
1129 | if (d == NULL || r == NULL || t == NULL) goto err; | |
1130 | ||
6dad7bd6 BM |
1131 | if (in_mont != NULL) |
1132 | mont=in_mont; | |
1133 | else | |
1134 | { | |
1135 | if ((mont = BN_MONT_CTX_new()) == NULL) goto err; | |
1136 | if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; | |
1137 | } | |
1138 | ||
e958c5af | 1139 | r_is_one = 1; /* except for Montgomery factor */ |
f8989a21 BM |
1140 | |
1141 | /* bits-1 >= 0 */ | |
1142 | ||
1143 | /* The result is accumulated in the product r*w. */ | |
1144 | w = a; /* bit 'bits-1' of 'p' is always set */ | |
1145 | for (b = bits-2; b >= 0; b--) | |
6dad7bd6 | 1146 | { |
f8989a21 BM |
1147 | /* First, square r*w. */ |
1148 | next_w = w*w; | |
1149 | if ((next_w/w) != w) /* overflow */ | |
6dad7bd6 | 1150 | { |
e958c5af BM |
1151 | if (r_is_one) |
1152 | { | |
1153 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; | |
1154 | r_is_one = 0; | |
1155 | } | |
1156 | else | |
1157 | { | |
1158 | if (!BN_MOD_MUL_WORD(r, w, m)) goto err; | |
1159 | } | |
f8989a21 BM |
1160 | next_w = 1; |
1161 | } | |
1162 | w = next_w; | |
e958c5af BM |
1163 | if (!r_is_one) |
1164 | { | |
1165 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; | |
1166 | } | |
f8989a21 BM |
1167 | |
1168 | /* Second, multiply r*w by 'a' if exponent bit is set. */ | |
1169 | if (BN_is_bit_set(p, b)) | |
1170 | { | |
1171 | next_w = w*a; | |
1172 | if ((next_w/a) != w) /* overflow */ | |
6dad7bd6 | 1173 | { |
e958c5af BM |
1174 | if (r_is_one) |
1175 | { | |
1176 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; | |
1177 | r_is_one = 0; | |
1178 | } | |
1179 | else | |
1180 | { | |
1181 | if (!BN_MOD_MUL_WORD(r, w, m)) goto err; | |
1182 | } | |
f8989a21 | 1183 | next_w = a; |
6dad7bd6 | 1184 | } |
f8989a21 | 1185 | w = next_w; |
6dad7bd6 BM |
1186 | } |
1187 | } | |
e958c5af | 1188 | |
f8989a21 BM |
1189 | /* Finally, set r:=r*w. */ |
1190 | if (w != 1) | |
1191 | { | |
e958c5af BM |
1192 | if (r_is_one) |
1193 | { | |
1194 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; | |
1195 | r_is_one = 0; | |
1196 | } | |
1197 | else | |
1198 | { | |
1199 | if (!BN_MOD_MUL_WORD(r, w, m)) goto err; | |
1200 | } | |
f8989a21 BM |
1201 | } |
1202 | ||
e958c5af BM |
1203 | if (r_is_one) /* can happen only if a == 1*/ |
1204 | { | |
1205 | if (!BN_one(rr)) goto err; | |
1206 | } | |
1207 | else | |
1208 | { | |
1209 | if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; | |
1210 | } | |
6dad7bd6 BM |
1211 | ret = 1; |
1212 | err: | |
1213 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); | |
1214 | BN_CTX_end(ctx); | |
d870740c | 1215 | bn_check_top(rr); |
6dad7bd6 BM |
1216 | return(ret); |
1217 | } | |
1218 | ||
d02b48c6 RE |
1219 | |
1220 | /* The old fallback, simple version :-) */ | |
82b2f57e GT |
1221 | int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
1222 | const BIGNUM *m, BN_CTX *ctx) | |
d02b48c6 | 1223 | { |
c86f2054 | 1224 | int i,j,bits,ret=0,wstart,wend,window,wvalue; |
d02b48c6 RE |
1225 | int start=1; |
1226 | BIGNUM *d; | |
c86f2054 GT |
1227 | /* Table of variables obtained from 'ctx' */ |
1228 | BIGNUM *val[TABLE_SIZE]; | |
d02b48c6 | 1229 | |
bd31fb21 | 1230 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) |
46a64376 | 1231 | { |
bd31fb21 | 1232 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
46a64376 BM |
1233 | BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
1234 | return -1; | |
1235 | } | |
1236 | ||
d02b48c6 RE |
1237 | bits=BN_num_bits(p); |
1238 | ||
1239 | if (bits == 0) | |
1240 | { | |
cd2eebfd BM |
1241 | ret = BN_one(r); |
1242 | return ret; | |
1243 | } | |
d02b48c6 | 1244 | |
9b141126 | 1245 | BN_CTX_start(ctx); |
c86f2054 GT |
1246 | d = BN_CTX_get(ctx); |
1247 | val[0] = BN_CTX_get(ctx); | |
1248 | if(!d || !val[0]) goto err; | |
9b141126 | 1249 | |
c86f2054 GT |
1250 | if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ |
1251 | if (BN_is_zero(val[0])) | |
73c2522c | 1252 | { |
b6358c89 GT |
1253 | BN_zero(r); |
1254 | ret = 1; | |
25439b76 | 1255 | goto err; |
73c2522c | 1256 | } |
d02b48c6 | 1257 | |
dc434bbc BM |
1258 | window = BN_window_bits_for_exponent_size(bits); |
1259 | if (window > 1) | |
d02b48c6 | 1260 | { |
c86f2054 | 1261 | if (!BN_mod_mul(d,val[0],val[0],m,ctx)) |
dc434bbc BM |
1262 | goto err; /* 2 */ |
1263 | j=1<<(window-1); | |
1264 | for (i=1; i<j; i++) | |
1265 | { | |
c86f2054 GT |
1266 | if(((val[i] = BN_CTX_get(ctx)) == NULL) || |
1267 | !BN_mod_mul(val[i],val[i-1],d,m,ctx)) | |
dc434bbc BM |
1268 | goto err; |
1269 | } | |
d02b48c6 | 1270 | } |
d02b48c6 RE |
1271 | |
1272 | start=1; /* This is used to avoid multiplication etc | |
1273 | * when there is only the value '1' in the | |
1274 | * buffer. */ | |
1275 | wvalue=0; /* The 'value' of the window */ | |
1276 | wstart=bits-1; /* The top bit of the window */ | |
1277 | wend=0; /* The bottom bit of the window */ | |
1278 | ||
1279 | if (!BN_one(r)) goto err; | |
1280 | ||
1281 | for (;;) | |
1282 | { | |
1283 | if (BN_is_bit_set(p,wstart) == 0) | |
1284 | { | |
1285 | if (!start) | |
1286 | if (!BN_mod_mul(r,r,r,m,ctx)) | |
1287 | goto err; | |
1288 | if (wstart == 0) break; | |
1289 | wstart--; | |
1290 | continue; | |
1291 | } | |
1292 | /* We now have wstart on a 'set' bit, we now need to work out | |
1293 | * how bit a window to do. To do this we need to scan | |
1294 | * forward until the last set bit before the end of the | |
1295 | * window */ | |
1296 | j=wstart; | |
1297 | wvalue=1; | |
1298 | wend=0; | |
1299 | for (i=1; i<window; i++) | |
1300 | { | |
1301 | if (wstart-i < 0) break; | |
1302 | if (BN_is_bit_set(p,wstart-i)) | |
1303 | { | |
1304 | wvalue<<=(i-wend); | |
1305 | wvalue|=1; | |
1306 | wend=i; | |
1307 | } | |
1308 | } | |
1309 | ||
1310 | /* wend is the size of the current window */ | |
1311 | j=wend+1; | |
1312 | /* add the 'bytes above' */ | |
1313 | if (!start) | |
1314 | for (i=0; i<j; i++) | |
1315 | { | |
1316 | if (!BN_mod_mul(r,r,r,m,ctx)) | |
1317 | goto err; | |
1318 | } | |
1319 | ||
1320 | /* wvalue will be an odd number < 2^window */ | |
c86f2054 | 1321 | if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx)) |
d02b48c6 RE |
1322 | goto err; |
1323 | ||
1324 | /* move the 'window' down further */ | |
1325 | wstart-=wend+1; | |
1326 | wvalue=0; | |
1327 | start=0; | |
1328 | if (wstart < 0) break; | |
1329 | } | |
1330 | ret=1; | |
1331 | err: | |
9b141126 | 1332 | BN_CTX_end(ctx); |
d870740c | 1333 | bn_check_top(r); |
d02b48c6 RE |
1334 | return(ret); |
1335 | } |