]>
Commit | Line | Data |
---|---|---|
4f22f405 | 1 | /* |
fecb3aae | 2 | * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved. |
dc434bbc | 3 | * |
367ace68 | 4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
4f22f405 RS |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at | |
7 | * https://www.openssl.org/source/license.html | |
dc434bbc | 8 | */ |
d02b48c6 | 9 | |
ae4186b0 DMSP |
10 | #ifndef OSSL_CRYPTO_BN_LOCAL_H |
11 | # define OSSL_CRYPTO_BN_LOCAL_H | |
d02b48c6 | 12 | |
78c83078 DW |
13 | /* |
14 | * The EDK2 build doesn't use bn_conf.h; it sets THIRTY_TWO_BIT or | |
15 | * SIXTY_FOUR_BIT in its own environment since it doesn't re-run our | |
16 | * Configure script and needs to support both 32-bit and 64-bit. | |
17 | */ | |
18 | # include <openssl/opensslconf.h> | |
19 | ||
20 | # if !defined(OPENSSL_SYS_UEFI) | |
25f2138b | 21 | # include "crypto/bn_conf.h" |
78c83078 DW |
22 | # endif |
23 | ||
25f2138b | 24 | # include "crypto/bn.h" |
af6c6c21 | 25 | # include "internal/cryptlib.h" |
5de32f22 | 26 | # include "internal/numbers.h" |
d02b48c6 | 27 | |
94af0cd7 RS |
28 | /* |
29 | * These preprocessor symbols control various aspects of the bignum headers | |
30 | * and library code. They're not defined by any "normal" configuration, as | |
a935791d RS |
31 | * they are intended for development and testing purposes. NB: defining |
32 | * them can be useful for debugging application code as well as openssl | |
94af0cd7 | 33 | * itself. BN_DEBUG - turn on various debugging alterations to the bignum |
a935791d RS |
34 | * code BN_RAND_DEBUG - uses random poisoning of unused words to trip up |
35 | * mismanagement of bignum internals. Enable BN_RAND_DEBUG is known to | |
36 | * break some of the OpenSSL tests. | |
94af0cd7 | 37 | */ |
a935791d RS |
38 | # if defined(BN_RAND_DEBUG) && !defined(BN_DEBUG) |
39 | # define BN_DEBUG | |
40 | # endif | |
41 | # if defined(BN_RAND_DEBUG) | |
42 | # include <openssl/rand.h> | |
43 | # endif | |
94af0cd7 | 44 | |
30667f5c BE |
45 | /* |
46 | * This should limit the stack usage due to alloca to about 4K. | |
47 | * BN_SOFT_LIMIT is a soft limit equivalent to 2*OPENSSL_RSA_MAX_MODULUS_BITS. | |
48 | * Beyond that size bn_mul_mont is no longer used, and the constant time | |
49 | * assembler code is disabled, due to the blatant alloca and bn_mul_mont usage. | |
50 | * Note that bn_mul_mont does an alloca that is hidden away in assembly. | |
51 | * It is not recommended to do computations with numbers exceeding this limit, | |
52 | * since the result will be highly version dependent: | |
53 | * While the current OpenSSL version will use non-optimized, but safe code, | |
54 | * previous versions will use optimized code, that may crash due to unexpected | |
55 | * stack overflow, and future versions may very well turn this into a hard | |
56 | * limit. | |
57 | * Note however, that it is possible to override the size limit using | |
58 | * "./config -DBN_SOFT_LIMIT=<limit>" if necessary, and the O/S specific | |
59 | * stack limit is known and taken into consideration. | |
60 | */ | |
61 | # ifndef BN_SOFT_LIMIT | |
62 | # define BN_SOFT_LIMIT (4096 / BN_BYTES) | |
63 | # endif | |
64 | ||
94af0cd7 RS |
65 | # ifndef OPENSSL_SMALL_FOOTPRINT |
66 | # define BN_MUL_COMBA | |
67 | # define BN_SQR_COMBA | |
68 | # define BN_RECURSION | |
69 | # endif | |
70 | ||
71 | /* | |
72 | * This next option uses the C libraries (2 word)/(1 word) function. If it is | |
73 | * not defined, I use my C version (which is slower). The reason for this | |
74 | * flag is that when the particular C compiler library routine is used, and | |
75 | * the library is linked with a different compiler, the library is missing. | |
76 | * This mostly happens when the library is built with gcc and then linked | |
77 | * using normal cc. This would be a common occurrence because gcc normally | |
78 | * produces code that is 2 times faster than system compilers for the big | |
79 | * number stuff. For machines with only one compiler (or shared libraries), | |
80 | * this should be on. Again this in only really a problem on machines using | |
81 | * "long long's", are 32bit, and are not using my assembler code. | |
82 | */ | |
83 | # if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \ | |
84 | defined(OPENSSL_SYS_WIN32) || defined(linux) | |
85 | # define BN_DIV2W | |
86 | # endif | |
87 | ||
88 | /* | |
89 | * 64-bit processor with LP64 ABI | |
90 | */ | |
91 | # ifdef SIXTY_FOUR_BIT_LONG | |
92 | # define BN_ULLONG unsigned long long | |
93 | # define BN_BITS4 32 | |
94 | # define BN_MASK2 (0xffffffffffffffffL) | |
95 | # define BN_MASK2l (0xffffffffL) | |
96 | # define BN_MASK2h (0xffffffff00000000L) | |
97 | # define BN_MASK2h1 (0xffffffff80000000L) | |
98 | # define BN_DEC_CONV (10000000000000000000UL) | |
99 | # define BN_DEC_NUM 19 | |
100 | # define BN_DEC_FMT1 "%lu" | |
101 | # define BN_DEC_FMT2 "%019lu" | |
102 | # endif | |
103 | ||
104 | /* | |
105 | * 64-bit processor other than LP64 ABI | |
106 | */ | |
107 | # ifdef SIXTY_FOUR_BIT | |
108 | # undef BN_LLONG | |
109 | # undef BN_ULLONG | |
110 | # define BN_BITS4 32 | |
111 | # define BN_MASK2 (0xffffffffffffffffLL) | |
112 | # define BN_MASK2l (0xffffffffL) | |
113 | # define BN_MASK2h (0xffffffff00000000LL) | |
114 | # define BN_MASK2h1 (0xffffffff80000000LL) | |
115 | # define BN_DEC_CONV (10000000000000000000ULL) | |
116 | # define BN_DEC_NUM 19 | |
117 | # define BN_DEC_FMT1 "%llu" | |
118 | # define BN_DEC_FMT2 "%019llu" | |
119 | # endif | |
120 | ||
121 | # ifdef THIRTY_TWO_BIT | |
122 | # ifdef BN_LLONG | |
123 | # if defined(_WIN32) && !defined(__GNUC__) | |
124 | # define BN_ULLONG unsigned __int64 | |
125 | # else | |
126 | # define BN_ULLONG unsigned long long | |
127 | # endif | |
128 | # endif | |
129 | # define BN_BITS4 16 | |
130 | # define BN_MASK2 (0xffffffffL) | |
131 | # define BN_MASK2l (0xffff) | |
132 | # define BN_MASK2h1 (0xffff8000L) | |
133 | # define BN_MASK2h (0xffff0000L) | |
134 | # define BN_DEC_CONV (1000000000L) | |
135 | # define BN_DEC_NUM 9 | |
136 | # define BN_DEC_FMT1 "%u" | |
137 | # define BN_DEC_FMT2 "%09u" | |
138 | # endif | |
139 | ||
140 | ||
1d97c843 TH |
141 | /*- |
142 | * Bignum consistency macros | |
02a62d1a MC |
143 | * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from |
144 | * bignum data after direct manipulations on the data. There is also an | |
145 | * "internal" macro, bn_check_top(), for verifying that there are no leading | |
146 | * zeroes. Unfortunately, some auditing is required due to the fact that | |
147 | * bn_fix_top() has become an overabused duct-tape because bignum data is | |
148 | * occasionally passed around in an inconsistent state. So the following | |
149 | * changes have been made to sort this out; | |
150 | * - bn_fix_top()s implementation has been moved to bn_correct_top() | |
151 | * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and | |
152 | * bn_check_top() is as before. | |
153 | * - if BN_DEBUG *is* defined; | |
154 | * - bn_check_top() tries to pollute unused words even if the bignum 'top' is | |
a935791d | 155 | * consistent. (ed: only if BN_RAND_DEBUG is defined) |
02a62d1a MC |
156 | * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything. |
157 | * The idea is to have debug builds flag up inconsistent bignums when they | |
158 | * occur. If that occurs in a bn_fix_top(), we examine the code in question; if | |
159 | * the use of bn_fix_top() was appropriate (ie. it follows directly after code | |
160 | * that manipulates the bignum) it is converted to bn_correct_top(), and if it | |
161 | * was not appropriate, we convert it permanently to bn_check_top() and track | |
162 | * down the cause of the bug. Eventually, no internal code should be using the | |
163 | * bn_fix_top() macro. External applications and libraries should try this with | |
164 | * their own code too, both in terms of building against the openssl headers | |
165 | * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it | |
166 | * defined. This not only improves external code, it provides more test | |
167 | * coverage for openssl's own code. | |
168 | */ | |
169 | ||
0f113f3e | 170 | # ifdef BN_DEBUG |
305b68f1 AP |
171 | /* |
172 | * The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with | |
173 | * bn_correct_top, in other words such vectors are permitted to have zeros | |
174 | * in most significant limbs. Such vectors are used internally to achieve | |
175 | * execution time invariance for critical operations with private keys. | |
176 | * It's BN_DEBUG-only flag, because user application is not supposed to | |
177 | * observe it anyway. Moreover, optimizing compiler would actually remove | |
178 | * all operations manipulating the bit in question in non-BN_DEBUG build. | |
179 | */ | |
180 | # define BN_FLG_FIXED_TOP 0x10000 | |
a935791d | 181 | # ifdef BN_RAND_DEBUG |
0f113f3e MC |
182 | # define bn_pollute(a) \ |
183 | do { \ | |
e8aa8b6c F |
184 | const BIGNUM *_bnum1 = (a); \ |
185 | if (_bnum1->top < _bnum1->dmax) { \ | |
186 | unsigned char _tmp_char; \ | |
187 | /* We cast away const without the compiler knowing, any \ | |
188 | * *genuinely* constant variables that aren't mutable \ | |
189 | * wouldn't be constructed with top!=dmax. */ \ | |
190 | BN_ULONG *_not_const; \ | |
191 | memcpy(&_not_const, &_bnum1->d, sizeof(_not_const)); \ | |
a935791d | 192 | (void)RAND_bytes(&_tmp_char, 1); /* Debug only - safe to ignore error return */\ |
e8aa8b6c F |
193 | memset(_not_const + _bnum1->top, _tmp_char, \ |
194 | sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \ | |
195 | } \ | |
0f113f3e | 196 | } while(0) |
0f113f3e MC |
197 | # else |
198 | # define bn_pollute(a) | |
199 | # endif | |
200 | # define bn_check_top(a) \ | |
201 | do { \ | |
202 | const BIGNUM *_bnum2 = (a); \ | |
203 | if (_bnum2 != NULL) { \ | |
5d1c09de AP |
204 | int _top = _bnum2->top; \ |
205 | (void)ossl_assert((_top == 0 && !_bnum2->neg) || \ | |
206 | (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \ | |
207 | || _bnum2->d[_top - 1] != 0))); \ | |
0f113f3e MC |
208 | bn_pollute(_bnum2); \ |
209 | } \ | |
210 | } while(0) | |
211 | ||
212 | # define bn_fix_top(a) bn_check_top(a) | |
213 | ||
214 | # define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) | |
215 | # define bn_wcheck_size(bn, words) \ | |
216 | do { \ | |
217 | const BIGNUM *_bnum2 = (bn); \ | |
437e5050 MC |
218 | assert((words) <= (_bnum2)->dmax && \ |
219 | (words) >= (_bnum2)->top); \ | |
0f113f3e MC |
220 | /* avoid unused variable warning with NDEBUG */ \ |
221 | (void)(_bnum2); \ | |
222 | } while(0) | |
223 | ||
224 | # else /* !BN_DEBUG */ | |
225 | ||
305b68f1 | 226 | # define BN_FLG_FIXED_TOP 0 |
0f113f3e MC |
227 | # define bn_pollute(a) |
228 | # define bn_check_top(a) | |
229 | # define bn_fix_top(a) bn_correct_top(a) | |
230 | # define bn_check_size(bn, bits) | |
231 | # define bn_wcheck_size(bn, words) | |
232 | ||
233 | # endif | |
234 | ||
235 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, | |
236 | BN_ULONG w); | |
02a62d1a | 237 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); |
0f113f3e | 238 | void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); |
02a62d1a | 239 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); |
0f113f3e MC |
240 | BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
241 | int num); | |
242 | BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | |
243 | int num); | |
244 | ||
245 | struct bignum_st { | |
99d3349d RL |
246 | BN_ULONG *d; /* |
247 | * Pointer to an array of 'BN_BITS2' bit | |
248 | * chunks. These chunks are organised in | |
249 | * a least significant chunk first order. | |
250 | */ | |
0f113f3e MC |
251 | int top; /* Index of last used d +1. */ |
252 | /* The next are internal book keeping for bn_expand. */ | |
253 | int dmax; /* Size of the d array. */ | |
254 | int neg; /* one if the number is negative */ | |
255 | int flags; | |
256 | }; | |
19391879 MC |
257 | |
258 | /* Used for montgomery multiplication */ | |
0f113f3e MC |
259 | struct bn_mont_ctx_st { |
260 | int ri; /* number of bits in R */ | |
71883868 AP |
261 | BIGNUM RR; /* used to convert to montgomery form, |
262 | possibly zero-padded */ | |
0f113f3e MC |
263 | BIGNUM N; /* The modulus */ |
264 | BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 (Ni is only | |
265 | * stored for bignum algorithm) */ | |
266 | BN_ULONG n0[2]; /* least significant word(s) of Ni; (type | |
267 | * changed with 0.9.9, was "BN_ULONG n0;" | |
268 | * before) */ | |
269 | int flags; | |
270 | }; | |
271 | ||
272 | /* | |
273 | * Used for reciprocal division/mod functions It cannot be shared between | |
274 | * threads | |
19391879 | 275 | */ |
0f113f3e MC |
276 | struct bn_recp_ctx_st { |
277 | BIGNUM N; /* the divisor */ | |
278 | BIGNUM Nr; /* the reciprocal */ | |
279 | int num_bits; | |
280 | int shift; | |
281 | int flags; | |
282 | }; | |
19391879 MC |
283 | |
284 | /* Used for slow "generation" functions. */ | |
0f113f3e MC |
285 | struct bn_gencb_st { |
286 | unsigned int ver; /* To handle binary (in)compatibility */ | |
287 | void *arg; /* callback-specific data */ | |
288 | union { | |
e8aa8b6c | 289 | /* if (ver==1) - handles old style callbacks */ |
0f113f3e | 290 | void (*cb_1) (int, int, void *); |
e8aa8b6c | 291 | /* if (ver==2) - new callback style */ |
0f113f3e MC |
292 | int (*cb_2) (int, int, BN_GENCB *); |
293 | } cb; | |
294 | }; | |
19391879 | 295 | |
1d97c843 | 296 | /*- |
dc434bbc BM |
297 | * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions |
298 | * | |
299 | * | |
300 | * For window size 'w' (w >= 2) and a random 'b' bits exponent, | |
301 | * the number of multiplications is a constant plus on average | |
302 | * | |
303 | * 2^(w-1) + (b-w)/(w+1); | |
304 | * | |
305 | * here 2^(w-1) is for precomputing the table (we actually need | |
306 | * entries only for windows that have the lowest bit set), and | |
307 | * (b-w)/(w+1) is an approximation for the expected number of | |
308 | * w-bit windows, not counting the first one. | |
309 | * | |
310 | * Thus we should use | |
311 | * | |
312 | * w >= 6 if b > 671 | |
313 | * w = 5 if 671 > b > 239 | |
314 | * w = 4 if 239 > b > 79 | |
315 | * w = 3 if 79 > b > 23 | |
316 | * w <= 2 if 23 > b | |
317 | * | |
318 | * (with draws in between). Very small exponents are often selected | |
319 | * with low Hamming weight, so we use w = 1 for b <= 23. | |
320 | */ | |
b0700d2c | 321 | # define BN_window_bits_for_exponent_size(b) \ |
0f113f3e MC |
322 | ((b) > 671 ? 6 : \ |
323 | (b) > 239 ? 5 : \ | |
324 | (b) > 79 ? 4 : \ | |
325 | (b) > 23 ? 3 : 1) | |
dc434bbc | 326 | |
0f113f3e | 327 | /* |
c2969ff6 | 328 | * BN_mod_exp_mont_consttime is based on the assumption that the L1 data cache |
0f113f3e | 329 | * line width of the target processor is at least the following value. |
46a64376 | 330 | */ |
0f113f3e MC |
331 | # define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 ) |
332 | # define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) | |
46a64376 | 333 | |
0f113f3e MC |
334 | /* |
335 | * Window sizes optimized for fixed window size modular exponentiation | |
336 | * algorithm (BN_mod_exp_mont_consttime). To achieve the security goals of | |
337 | * BN_mode_exp_mont_consttime, the maximum size of the window must not exceed | |
338 | * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). Window size thresholds are | |
339 | * defined for cache line sizes of 32 and 64, cache line sizes where | |
340 | * log_2(32)=5 and log_2(64)=6 respectively. A window size of 7 should only be | |
341 | * used on processors that have a 128 byte or greater cache line size. | |
46a64376 | 342 | */ |
0f113f3e | 343 | # if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 |
46a64376 BM |
344 | |
345 | # define BN_window_bits_for_ctime_exponent_size(b) \ | |
0f113f3e MC |
346 | ((b) > 937 ? 6 : \ |
347 | (b) > 306 ? 5 : \ | |
348 | (b) > 89 ? 4 : \ | |
349 | (b) > 22 ? 3 : 1) | |
350 | # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) | |
46a64376 | 351 | |
0f113f3e | 352 | # elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 |
46a64376 BM |
353 | |
354 | # define BN_window_bits_for_ctime_exponent_size(b) \ | |
0f113f3e MC |
355 | ((b) > 306 ? 5 : \ |
356 | (b) > 89 ? 4 : \ | |
357 | (b) > 22 ? 3 : 1) | |
358 | # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) | |
46a64376 | 359 | |
0f113f3e | 360 | # endif |
46a64376 | 361 | |
dfeab068 RE |
362 | /* Pentium pro 16,16,16,32,64 */ |
363 | /* Alpha 16,16,16,16.64 */ | |
0f113f3e MC |
364 | # define BN_MULL_SIZE_NORMAL (16)/* 32 */ |
365 | # define BN_MUL_RECURSIVE_SIZE_NORMAL (16)/* 32 less than */ | |
366 | # define BN_SQR_RECURSIVE_SIZE_NORMAL (16)/* 32 */ | |
367 | # define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)/* 32 */ | |
368 | # define BN_MONT_CTX_SET_SIZE_WORD (64)/* 32 */ | |
369 | ||
370 | /* | |
371 | * 2011-02-22 SMS. In various places, a size_t variable or a type cast to | |
372 | * size_t was used to perform integer-only operations on pointers. This | |
373 | * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t | |
374 | * is still only 32 bits. What's needed in these cases is an integer type | |
375 | * with the same size as a pointer, which size_t is not certain to be. The | |
376 | * only fix here is VMS-specific. | |
8d00f342 | 377 | */ |
0f113f3e MC |
378 | # if defined(OPENSSL_SYS_VMS) |
379 | # if __INITIAL_POINTER_SIZE == 64 | |
380 | # define PTR_SIZE_INT long long | |
381 | # else /* __INITIAL_POINTER_SIZE == 64 */ | |
382 | # define PTR_SIZE_INT int | |
383 | # endif /* __INITIAL_POINTER_SIZE == 64 [else] */ | |
384 | # elif !defined(PTR_SIZE_INT) /* defined(OPENSSL_SYS_VMS) */ | |
385 | # define PTR_SIZE_INT size_t | |
386 | # endif /* defined(OPENSSL_SYS_VMS) [else] */ | |
387 | ||
388 | # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) | |
fb81ac5e AP |
389 | /* |
390 | * BN_UMULT_HIGH section. | |
e3713c36 RS |
391 | * If the compiler doesn't support 2*N integer type, then you have to |
392 | * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some | |
393 | * shifts and additions which unavoidably results in severe performance | |
394 | * penalties. Of course provided that the hardware is capable of producing | |
395 | * 2*N result... That's when you normally start considering assembler | |
396 | * implementation. However! It should be pointed out that some CPUs (e.g., | |
397 | * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating | |
398 | * the upper half of the product placing the result into a general | |
399 | * purpose register. Now *if* the compiler supports inline assembler, | |
400 | * then it's not impossible to implement the "bignum" routines (and have | |
401 | * the compiler optimize 'em) exhibiting "native" performance in C. That's | |
402 | * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do | |
403 | * support 2*64 integer type, which is also used here. | |
fb81ac5e | 404 | */ |
7aca3298 | 405 | # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \ |
e3713c36 | 406 | (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) |
5de32f22 | 407 | # define BN_UMULT_HIGH(a,b) (((uint128_t)(a)*(b))>>64) |
7aca3298 | 408 | # define BN_UMULT_LOHI(low,high,a,b) ({ \ |
5de32f22 | 409 | uint128_t ret=(uint128_t)(a)*(b); \ |
57c835ac | 410 | (high)=ret>>64; (low)=ret; }) |
7aca3298 | 411 | # elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) |
0f113f3e MC |
412 | # if defined(__DECC) |
413 | # include <c_asm.h> | |
414 | # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) | |
415 | # elif defined(__GNUC__) && __GNUC__>=2 | |
57c835ac | 416 | # define BN_UMULT_HIGH(a,b) ({ \ |
0f113f3e MC |
417 | register BN_ULONG ret; \ |
418 | asm ("umulh %1,%2,%0" \ | |
419 | : "=r"(ret) \ | |
420 | : "r"(a), "r"(b)); \ | |
57c835ac | 421 | ret; }) |
0f113f3e | 422 | # endif /* compiler */ |
46288370 | 423 | # elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG) |
0f113f3e | 424 | # if defined(__GNUC__) && __GNUC__>=2 |
57c835ac | 425 | # define BN_UMULT_HIGH(a,b) ({ \ |
0f113f3e MC |
426 | register BN_ULONG ret; \ |
427 | asm ("mulhdu %0,%1,%2" \ | |
428 | : "=r"(ret) \ | |
429 | : "r"(a), "r"(b)); \ | |
57c835ac | 430 | ret; }) |
0f113f3e MC |
431 | # endif /* compiler */ |
432 | # elif (defined(__x86_64) || defined(__x86_64__)) && \ | |
122396f2 | 433 | (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) |
0f113f3e | 434 | # if defined(__GNUC__) && __GNUC__>=2 |
57c835ac | 435 | # define BN_UMULT_HIGH(a,b) ({ \ |
0f113f3e MC |
436 | register BN_ULONG ret,discard; \ |
437 | asm ("mulq %3" \ | |
438 | : "=a"(discard),"=d"(ret) \ | |
439 | : "a"(a), "g"(b) \ | |
440 | : "cc"); \ | |
57c835ac AP |
441 | ret; }) |
442 | # define BN_UMULT_LOHI(low,high,a,b) \ | |
0f113f3e MC |
443 | asm ("mulq %3" \ |
444 | : "=a"(low),"=d"(high) \ | |
445 | : "a"(a),"g"(b) \ | |
446 | : "cc"); | |
447 | # endif | |
448 | # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT) | |
449 | # if defined(_MSC_VER) && _MSC_VER>=1400 | |
450 | unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b); | |
451 | unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, | |
452 | unsigned __int64 *h); | |
453 | # pragma intrinsic(__umulh,_umul128) | |
454 | # define BN_UMULT_HIGH(a,b) __umulh((a),(b)) | |
455 | # define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) | |
456 | # endif | |
457 | # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) | |
458 | # if defined(__GNUC__) && __GNUC__>=2 | |
7aca3298 | 459 | # define BN_UMULT_HIGH(a,b) ({ \ |
0f113f3e MC |
460 | register BN_ULONG ret; \ |
461 | asm ("dmultu %1,%2" \ | |
462 | : "=h"(ret) \ | |
463 | : "r"(a), "r"(b) : "l"); \ | |
464 | ret; }) | |
7aca3298 | 465 | # define BN_UMULT_LOHI(low,high,a,b) \ |
0f113f3e MC |
466 | asm ("dmultu %2,%3" \ |
467 | : "=l"(low),"=h"(high) \ | |
468 | : "r"(a), "r"(b)); | |
0f113f3e MC |
469 | # endif |
470 | # elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) | |
471 | # if defined(__GNUC__) && __GNUC__>=2 | |
57c835ac | 472 | # define BN_UMULT_HIGH(a,b) ({ \ |
0f113f3e MC |
473 | register BN_ULONG ret; \ |
474 | asm ("umulh %0,%1,%2" \ | |
475 | : "=r"(ret) \ | |
476 | : "r"(a), "r"(b)); \ | |
57c835ac | 477 | ret; }) |
0f113f3e MC |
478 | # endif |
479 | # endif /* cpu */ | |
480 | # endif /* OPENSSL_NO_ASM */ | |
fb81ac5e | 481 | |
a935791d | 482 | # ifdef BN_RAND_DEBUG |
0f113f3e MC |
483 | # define bn_clear_top2max(a) \ |
484 | { \ | |
485 | int ind = (a)->dmax - (a)->top; \ | |
486 | BN_ULONG *ftl = &(a)->d[(a)->top-1]; \ | |
487 | for (; ind != 0; ind--) \ | |
488 | *(++ftl) = 0x0; \ | |
489 | } | |
490 | # else | |
491 | # define bn_clear_top2max(a) | |
492 | # endif | |
493 | ||
494 | # ifdef BN_LLONG | |
46288370 AP |
495 | /******************************************************************* |
496 | * Using the long long type, has to be twice as wide as BN_ULONG... | |
497 | */ | |
498 | # define Lw(t) (((BN_ULONG)(t))&BN_MASK2) | |
499 | # define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) | |
500 | ||
0f113f3e MC |
501 | # define mul_add(r,a,w,c) { \ |
502 | BN_ULLONG t; \ | |
503 | t=(BN_ULLONG)w * (a) + (r) + (c); \ | |
504 | (r)= Lw(t); \ | |
505 | (c)= Hw(t); \ | |
506 | } | |
507 | ||
508 | # define mul(r,a,w,c) { \ | |
509 | BN_ULLONG t; \ | |
510 | t=(BN_ULLONG)w * (a) + (c); \ | |
511 | (r)= Lw(t); \ | |
512 | (c)= Hw(t); \ | |
513 | } | |
514 | ||
515 | # define sqr(r0,r1,a) { \ | |
516 | BN_ULLONG t; \ | |
517 | t=(BN_ULLONG)(a)*(a); \ | |
518 | (r0)=Lw(t); \ | |
519 | (r1)=Hw(t); \ | |
520 | } | |
521 | ||
522 | # elif defined(BN_UMULT_LOHI) | |
523 | # define mul_add(r,a,w,c) { \ | |
524 | BN_ULONG high,low,ret,tmp=(a); \ | |
525 | ret = (r); \ | |
526 | BN_UMULT_LOHI(low,high,w,tmp); \ | |
527 | ret += (c); \ | |
528 | (c) = (ret<(c))?1:0; \ | |
529 | (c) += high; \ | |
530 | ret += low; \ | |
531 | (c) += (ret<low)?1:0; \ | |
532 | (r) = ret; \ | |
533 | } | |
534 | ||
535 | # define mul(r,a,w,c) { \ | |
536 | BN_ULONG high,low,ret,ta=(a); \ | |
537 | BN_UMULT_LOHI(low,high,w,ta); \ | |
538 | ret = low + (c); \ | |
539 | (c) = high; \ | |
540 | (c) += (ret<low)?1:0; \ | |
541 | (r) = ret; \ | |
542 | } | |
543 | ||
544 | # define sqr(r0,r1,a) { \ | |
545 | BN_ULONG tmp=(a); \ | |
546 | BN_UMULT_LOHI(r0,r1,tmp,tmp); \ | |
547 | } | |
548 | ||
549 | # elif defined(BN_UMULT_HIGH) | |
550 | # define mul_add(r,a,w,c) { \ | |
551 | BN_ULONG high,low,ret,tmp=(a); \ | |
552 | ret = (r); \ | |
553 | high= BN_UMULT_HIGH(w,tmp); \ | |
554 | ret += (c); \ | |
555 | low = (w) * tmp; \ | |
556 | (c) = (ret<(c))?1:0; \ | |
557 | (c) += high; \ | |
558 | ret += low; \ | |
559 | (c) += (ret<low)?1:0; \ | |
560 | (r) = ret; \ | |
561 | } | |
562 | ||
563 | # define mul(r,a,w,c) { \ | |
564 | BN_ULONG high,low,ret,ta=(a); \ | |
565 | low = (w) * ta; \ | |
566 | high= BN_UMULT_HIGH(w,ta); \ | |
567 | ret = low + (c); \ | |
568 | (c) = high; \ | |
569 | (c) += (ret<low)?1:0; \ | |
570 | (r) = ret; \ | |
571 | } | |
572 | ||
573 | # define sqr(r0,r1,a) { \ | |
574 | BN_ULONG tmp=(a); \ | |
575 | (r0) = tmp * tmp; \ | |
576 | (r1) = BN_UMULT_HIGH(tmp,tmp); \ | |
577 | } | |
578 | ||
579 | # else | |
d02b48c6 RE |
580 | /************************************************************* |
581 | * No long long type | |
582 | */ | |
583 | ||
0f113f3e MC |
584 | # define LBITS(a) ((a)&BN_MASK2l) |
585 | # define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) | |
586 | # define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) | |
d02b48c6 | 587 | |
0f113f3e MC |
588 | # define LLBITS(a) ((a)&BN_MASKl) |
589 | # define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl) | |
590 | # define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2) | |
d02b48c6 | 591 | |
0f113f3e MC |
592 | # define mul64(l,h,bl,bh) \ |
593 | { \ | |
594 | BN_ULONG m,m1,lt,ht; \ | |
d02b48c6 | 595 | \ |
0f113f3e MC |
596 | lt=l; \ |
597 | ht=h; \ | |
598 | m =(bh)*(lt); \ | |
599 | lt=(bl)*(lt); \ | |
600 | m1=(bl)*(ht); \ | |
601 | ht =(bh)*(ht); \ | |
602 | m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \ | |
603 | ht+=HBITS(m); \ | |
604 | m1=L2HBITS(m); \ | |
605 | lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ | |
606 | (l)=lt; \ | |
607 | (h)=ht; \ | |
608 | } | |
609 | ||
610 | # define sqr64(lo,ho,in) \ | |
611 | { \ | |
612 | BN_ULONG l,h,m; \ | |
d02b48c6 | 613 | \ |
0f113f3e MC |
614 | h=(in); \ |
615 | l=LBITS(h); \ | |
616 | h=HBITS(h); \ | |
617 | m =(l)*(h); \ | |
618 | l*=l; \ | |
619 | h*=h; \ | |
620 | h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \ | |
621 | m =(m&BN_MASK2l)<<(BN_BITS4+1); \ | |
622 | l=(l+m)&BN_MASK2; if (l < m) h++; \ | |
623 | (lo)=l; \ | |
624 | (ho)=h; \ | |
625 | } | |
626 | ||
627 | # define mul_add(r,a,bl,bh,c) { \ | |
628 | BN_ULONG l,h; \ | |
d02b48c6 | 629 | \ |
0f113f3e MC |
630 | h= (a); \ |
631 | l=LBITS(h); \ | |
632 | h=HBITS(h); \ | |
633 | mul64(l,h,(bl),(bh)); \ | |
d02b48c6 | 634 | \ |
0f113f3e MC |
635 | /* non-multiply part */ \ |
636 | l=(l+(c))&BN_MASK2; if (l < (c)) h++; \ | |
637 | (c)=(r); \ | |
638 | l=(l+(c))&BN_MASK2; if (l < (c)) h++; \ | |
639 | (c)=h&BN_MASK2; \ | |
640 | (r)=l; \ | |
641 | } | |
642 | ||
643 | # define mul(r,a,bl,bh,c) { \ | |
644 | BN_ULONG l,h; \ | |
d02b48c6 | 645 | \ |
0f113f3e MC |
646 | h= (a); \ |
647 | l=LBITS(h); \ | |
648 | h=HBITS(h); \ | |
649 | mul64(l,h,(bl),(bh)); \ | |
d02b48c6 | 650 | \ |
0f113f3e MC |
651 | /* non-multiply part */ \ |
652 | l+=(c); if ((l&BN_MASK2) < (c)) h++; \ | |
653 | (c)=h&BN_MASK2; \ | |
654 | (r)=l&BN_MASK2; \ | |
655 | } | |
656 | # endif /* !BN_LLONG */ | |
d02b48c6 | 657 | |
19391879 MC |
658 | void BN_RECP_CTX_init(BN_RECP_CTX *recp); |
659 | void BN_MONT_CTX_init(BN_MONT_CTX *ctx); | |
660 | ||
d59c7c81 | 661 | void bn_init(BIGNUM *a); |
0f113f3e MC |
662 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); |
663 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | |
664 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | |
cbd48ba6 | 665 | void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp); |
0f113f3e MC |
666 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); |
667 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); | |
668 | int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n); | |
669 | int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); | |
670 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |
671 | int dna, int dnb, BN_ULONG *t); | |
672 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, | |
673 | int n, int tna, int tnb, BN_ULONG *t); | |
674 | void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); | |
675 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); | |
676 | void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |
677 | BN_ULONG *t); | |
6343829a | 678 | BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
0f113f3e MC |
679 | int cl, int dl); |
680 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | |
681 | const BN_ULONG *np, const BN_ULONG *n0, int num); | |
58964a49 | 682 | |
879bd6e3 | 683 | BIGNUM *int_bn_mod_inverse(BIGNUM *in, |
0f113f3e MC |
684 | const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, |
685 | int *noinv); | |
879bd6e3 | 686 | |
99ba9fd0 MC |
687 | static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) |
688 | { | |
689 | if (bits > (INT_MAX - BN_BITS2 + 1)) | |
690 | return NULL; | |
691 | ||
e8aa8b6c | 692 | if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) |
99ba9fd0 MC |
693 | return a; |
694 | ||
695 | return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); | |
696 | } | |
697 | ||
36ec749f P |
698 | int ossl_bn_check_prime(const BIGNUM *w, int checks, BN_CTX *ctx, |
699 | int do_trial_division, BN_GENCB *cb); | |
42619397 | 700 | |
d02b48c6 | 701 | #endif |