]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp256.c
Change to check last return value of BN_CTX_get
[thirdparty/openssl.git] / crypto / ec / ecp_nistp256.c
1 /*
2 * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26 /*
27 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
28 *
29 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
30 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
31 * work which got its smarts from Daniel J. Bernstein's work on the same.
32 */
33
34 #include <openssl/opensslconf.h>
35 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
36 NON_EMPTY_TRANSLATION_UNIT
37 #else
38
39 # include <stdint.h>
40 # include <string.h>
41 # include <openssl/err.h>
42 # include "ec_lcl.h"
43
44 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
45 /* even with gcc, the typedef won't work for 32-bit platforms */
46 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
47 * platforms */
48 typedef __int128_t int128_t;
49 # else
50 # error "Need GCC 3.1 or later to define type uint128_t"
51 # endif
52
53 typedef uint8_t u8;
54 typedef uint32_t u32;
55 typedef uint64_t u64;
56 typedef int64_t s64;
57
58 /*
59 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
60 * can serialise an element of this field into 32 bytes. We call this an
61 * felem_bytearray.
62 */
63
64 typedef u8 felem_bytearray[32];
65
66 /*
67 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
68 * values are big-endian.
69 */
70 static const felem_bytearray nistp256_curve_params[5] = {
71 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
73 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
75 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
76 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
79 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
80 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
81 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
82 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
83 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
84 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
85 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
86 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
87 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
88 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
89 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
90 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
91 };
92
93 /*-
94 * The representation of field elements.
95 * ------------------------------------
96 *
97 * We represent field elements with either four 128-bit values, eight 128-bit
98 * values, or four 64-bit values. The field element represented is:
99 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
100 * or:
101 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
102 *
103 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
104 * apart, but are 128-bits wide, the most significant bits of each limb overlap
105 * with the least significant bits of the next.
106 *
107 * A field element with four limbs is an 'felem'. One with eight limbs is a
108 * 'longfelem'
109 *
110 * A field element with four, 64-bit values is called a 'smallfelem'. Small
111 * values are used as intermediate values before multiplication.
112 */
113
114 # define NLIMBS 4
115
116 typedef uint128_t limb;
117 typedef limb felem[NLIMBS];
118 typedef limb longfelem[NLIMBS * 2];
119 typedef u64 smallfelem[NLIMBS];
120
121 /* This is the value of the prime as four 64-bit words, little-endian. */
122 static const u64 kPrime[4] =
123 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
124 static const u64 bottom63bits = 0x7ffffffffffffffful;
125
126 /*
127 * bin32_to_felem takes a little-endian byte array and converts it into felem
128 * form. This assumes that the CPU is little-endian.
129 */
130 static void bin32_to_felem(felem out, const u8 in[32])
131 {
132 out[0] = *((u64 *)&in[0]);
133 out[1] = *((u64 *)&in[8]);
134 out[2] = *((u64 *)&in[16]);
135 out[3] = *((u64 *)&in[24]);
136 }
137
138 /*
139 * smallfelem_to_bin32 takes a smallfelem and serialises into a little
140 * endian, 32 byte array. This assumes that the CPU is little-endian.
141 */
142 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
143 {
144 *((u64 *)&out[0]) = in[0];
145 *((u64 *)&out[8]) = in[1];
146 *((u64 *)&out[16]) = in[2];
147 *((u64 *)&out[24]) = in[3];
148 }
149
150 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
151 static void flip_endian(u8 *out, const u8 *in, unsigned len)
152 {
153 unsigned i;
154 for (i = 0; i < len; ++i)
155 out[i] = in[len - 1 - i];
156 }
157
158 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
159 static int BN_to_felem(felem out, const BIGNUM *bn)
160 {
161 felem_bytearray b_in;
162 felem_bytearray b_out;
163 unsigned num_bytes;
164
165 /* BN_bn2bin eats leading zeroes */
166 memset(b_out, 0, sizeof(b_out));
167 num_bytes = BN_num_bytes(bn);
168 if (num_bytes > sizeof b_out) {
169 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
170 return 0;
171 }
172 if (BN_is_negative(bn)) {
173 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
174 return 0;
175 }
176 num_bytes = BN_bn2bin(bn, b_in);
177 flip_endian(b_out, b_in, num_bytes);
178 bin32_to_felem(out, b_out);
179 return 1;
180 }
181
182 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
183 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
184 {
185 felem_bytearray b_in, b_out;
186 smallfelem_to_bin32(b_in, in);
187 flip_endian(b_out, b_in, sizeof b_out);
188 return BN_bin2bn(b_out, sizeof b_out, out);
189 }
190
191 /*-
192 * Field operations
193 * ----------------
194 */
195
196 static void smallfelem_one(smallfelem out)
197 {
198 out[0] = 1;
199 out[1] = 0;
200 out[2] = 0;
201 out[3] = 0;
202 }
203
204 static void smallfelem_assign(smallfelem out, const smallfelem in)
205 {
206 out[0] = in[0];
207 out[1] = in[1];
208 out[2] = in[2];
209 out[3] = in[3];
210 }
211
212 static void felem_assign(felem out, const felem in)
213 {
214 out[0] = in[0];
215 out[1] = in[1];
216 out[2] = in[2];
217 out[3] = in[3];
218 }
219
220 /* felem_sum sets out = out + in. */
221 static void felem_sum(felem out, const felem in)
222 {
223 out[0] += in[0];
224 out[1] += in[1];
225 out[2] += in[2];
226 out[3] += in[3];
227 }
228
229 /* felem_small_sum sets out = out + in. */
230 static void felem_small_sum(felem out, const smallfelem in)
231 {
232 out[0] += in[0];
233 out[1] += in[1];
234 out[2] += in[2];
235 out[3] += in[3];
236 }
237
238 /* felem_scalar sets out = out * scalar */
239 static void felem_scalar(felem out, const u64 scalar)
240 {
241 out[0] *= scalar;
242 out[1] *= scalar;
243 out[2] *= scalar;
244 out[3] *= scalar;
245 }
246
247 /* longfelem_scalar sets out = out * scalar */
248 static void longfelem_scalar(longfelem out, const u64 scalar)
249 {
250 out[0] *= scalar;
251 out[1] *= scalar;
252 out[2] *= scalar;
253 out[3] *= scalar;
254 out[4] *= scalar;
255 out[5] *= scalar;
256 out[6] *= scalar;
257 out[7] *= scalar;
258 }
259
260 # define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
261 # define two105 (((limb)1) << 105)
262 # define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
263
264 /* zero105 is 0 mod p */
265 static const felem zero105 =
266 { two105m41m9, two105, two105m41p9, two105m41p9 };
267
268 /*-
269 * smallfelem_neg sets |out| to |-small|
270 * On exit:
271 * out[i] < out[i] + 2^105
272 */
273 static void smallfelem_neg(felem out, const smallfelem small)
274 {
275 /* In order to prevent underflow, we subtract from 0 mod p. */
276 out[0] = zero105[0] - small[0];
277 out[1] = zero105[1] - small[1];
278 out[2] = zero105[2] - small[2];
279 out[3] = zero105[3] - small[3];
280 }
281
282 /*-
283 * felem_diff subtracts |in| from |out|
284 * On entry:
285 * in[i] < 2^104
286 * On exit:
287 * out[i] < out[i] + 2^105
288 */
289 static void felem_diff(felem out, const felem in)
290 {
291 /*
292 * In order to prevent underflow, we add 0 mod p before subtracting.
293 */
294 out[0] += zero105[0];
295 out[1] += zero105[1];
296 out[2] += zero105[2];
297 out[3] += zero105[3];
298
299 out[0] -= in[0];
300 out[1] -= in[1];
301 out[2] -= in[2];
302 out[3] -= in[3];
303 }
304
305 # define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
306 # define two107 (((limb)1) << 107)
307 # define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
308
309 /* zero107 is 0 mod p */
310 static const felem zero107 =
311 { two107m43m11, two107, two107m43p11, two107m43p11 };
312
313 /*-
314 * An alternative felem_diff for larger inputs |in|
315 * felem_diff_zero107 subtracts |in| from |out|
316 * On entry:
317 * in[i] < 2^106
318 * On exit:
319 * out[i] < out[i] + 2^107
320 */
321 static void felem_diff_zero107(felem out, const felem in)
322 {
323 /*
324 * In order to prevent underflow, we add 0 mod p before subtracting.
325 */
326 out[0] += zero107[0];
327 out[1] += zero107[1];
328 out[2] += zero107[2];
329 out[3] += zero107[3];
330
331 out[0] -= in[0];
332 out[1] -= in[1];
333 out[2] -= in[2];
334 out[3] -= in[3];
335 }
336
337 /*-
338 * longfelem_diff subtracts |in| from |out|
339 * On entry:
340 * in[i] < 7*2^67
341 * On exit:
342 * out[i] < out[i] + 2^70 + 2^40
343 */
344 static void longfelem_diff(longfelem out, const longfelem in)
345 {
346 static const limb two70m8p6 =
347 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
348 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
349 static const limb two70 = (((limb) 1) << 70);
350 static const limb two70m40m38p6 =
351 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
352 (((limb) 1) << 6);
353 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
354
355 /* add 0 mod p to avoid underflow */
356 out[0] += two70m8p6;
357 out[1] += two70p40;
358 out[2] += two70;
359 out[3] += two70m40m38p6;
360 out[4] += two70m6;
361 out[5] += two70m6;
362 out[6] += two70m6;
363 out[7] += two70m6;
364
365 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
366 out[0] -= in[0];
367 out[1] -= in[1];
368 out[2] -= in[2];
369 out[3] -= in[3];
370 out[4] -= in[4];
371 out[5] -= in[5];
372 out[6] -= in[6];
373 out[7] -= in[7];
374 }
375
376 # define two64m0 (((limb)1) << 64) - 1
377 # define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
378 # define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
379 # define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
380
381 /* zero110 is 0 mod p */
382 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
383
384 /*-
385 * felem_shrink converts an felem into a smallfelem. The result isn't quite
386 * minimal as the value may be greater than p.
387 *
388 * On entry:
389 * in[i] < 2^109
390 * On exit:
391 * out[i] < 2^64
392 */
393 static void felem_shrink(smallfelem out, const felem in)
394 {
395 felem tmp;
396 u64 a, b, mask;
397 s64 high, low;
398 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
399
400 /* Carry 2->3 */
401 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
402 /* tmp[3] < 2^110 */
403
404 tmp[2] = zero110[2] + (u64)in[2];
405 tmp[0] = zero110[0] + in[0];
406 tmp[1] = zero110[1] + in[1];
407 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
408
409 /*
410 * We perform two partial reductions where we eliminate the high-word of
411 * tmp[3]. We don't update the other words till the end.
412 */
413 a = tmp[3] >> 64; /* a < 2^46 */
414 tmp[3] = (u64)tmp[3];
415 tmp[3] -= a;
416 tmp[3] += ((limb) a) << 32;
417 /* tmp[3] < 2^79 */
418
419 b = a;
420 a = tmp[3] >> 64; /* a < 2^15 */
421 b += a; /* b < 2^46 + 2^15 < 2^47 */
422 tmp[3] = (u64)tmp[3];
423 tmp[3] -= a;
424 tmp[3] += ((limb) a) << 32;
425 /* tmp[3] < 2^64 + 2^47 */
426
427 /*
428 * This adjusts the other two words to complete the two partial
429 * reductions.
430 */
431 tmp[0] += b;
432 tmp[1] -= (((limb) b) << 32);
433
434 /*
435 * In order to make space in tmp[3] for the carry from 2 -> 3, we
436 * conditionally subtract kPrime if tmp[3] is large enough.
437 */
438 high = tmp[3] >> 64;
439 /* As tmp[3] < 2^65, high is either 1 or 0 */
440 high <<= 63;
441 high >>= 63;
442 /*-
443 * high is:
444 * all ones if the high word of tmp[3] is 1
445 * all zeros if the high word of tmp[3] if 0 */
446 low = tmp[3];
447 mask = low >> 63;
448 /*-
449 * mask is:
450 * all ones if the MSB of low is 1
451 * all zeros if the MSB of low if 0 */
452 low &= bottom63bits;
453 low -= kPrime3Test;
454 /* if low was greater than kPrime3Test then the MSB is zero */
455 low = ~low;
456 low >>= 63;
457 /*-
458 * low is:
459 * all ones if low was > kPrime3Test
460 * all zeros if low was <= kPrime3Test */
461 mask = (mask & low) | high;
462 tmp[0] -= mask & kPrime[0];
463 tmp[1] -= mask & kPrime[1];
464 /* kPrime[2] is zero, so omitted */
465 tmp[3] -= mask & kPrime[3];
466 /* tmp[3] < 2**64 - 2**32 + 1 */
467
468 tmp[1] += ((u64)(tmp[0] >> 64));
469 tmp[0] = (u64)tmp[0];
470 tmp[2] += ((u64)(tmp[1] >> 64));
471 tmp[1] = (u64)tmp[1];
472 tmp[3] += ((u64)(tmp[2] >> 64));
473 tmp[2] = (u64)tmp[2];
474 /* tmp[i] < 2^64 */
475
476 out[0] = tmp[0];
477 out[1] = tmp[1];
478 out[2] = tmp[2];
479 out[3] = tmp[3];
480 }
481
482 /* smallfelem_expand converts a smallfelem to an felem */
483 static void smallfelem_expand(felem out, const smallfelem in)
484 {
485 out[0] = in[0];
486 out[1] = in[1];
487 out[2] = in[2];
488 out[3] = in[3];
489 }
490
491 /*-
492 * smallfelem_square sets |out| = |small|^2
493 * On entry:
494 * small[i] < 2^64
495 * On exit:
496 * out[i] < 7 * 2^64 < 2^67
497 */
498 static void smallfelem_square(longfelem out, const smallfelem small)
499 {
500 limb a;
501 u64 high, low;
502
503 a = ((uint128_t) small[0]) * small[0];
504 low = a;
505 high = a >> 64;
506 out[0] = low;
507 out[1] = high;
508
509 a = ((uint128_t) small[0]) * small[1];
510 low = a;
511 high = a >> 64;
512 out[1] += low;
513 out[1] += low;
514 out[2] = high;
515
516 a = ((uint128_t) small[0]) * small[2];
517 low = a;
518 high = a >> 64;
519 out[2] += low;
520 out[2] *= 2;
521 out[3] = high;
522
523 a = ((uint128_t) small[0]) * small[3];
524 low = a;
525 high = a >> 64;
526 out[3] += low;
527 out[4] = high;
528
529 a = ((uint128_t) small[1]) * small[2];
530 low = a;
531 high = a >> 64;
532 out[3] += low;
533 out[3] *= 2;
534 out[4] += high;
535
536 a = ((uint128_t) small[1]) * small[1];
537 low = a;
538 high = a >> 64;
539 out[2] += low;
540 out[3] += high;
541
542 a = ((uint128_t) small[1]) * small[3];
543 low = a;
544 high = a >> 64;
545 out[4] += low;
546 out[4] *= 2;
547 out[5] = high;
548
549 a = ((uint128_t) small[2]) * small[3];
550 low = a;
551 high = a >> 64;
552 out[5] += low;
553 out[5] *= 2;
554 out[6] = high;
555 out[6] += high;
556
557 a = ((uint128_t) small[2]) * small[2];
558 low = a;
559 high = a >> 64;
560 out[4] += low;
561 out[5] += high;
562
563 a = ((uint128_t) small[3]) * small[3];
564 low = a;
565 high = a >> 64;
566 out[6] += low;
567 out[7] = high;
568 }
569
570 /*-
571 * felem_square sets |out| = |in|^2
572 * On entry:
573 * in[i] < 2^109
574 * On exit:
575 * out[i] < 7 * 2^64 < 2^67
576 */
577 static void felem_square(longfelem out, const felem in)
578 {
579 u64 small[4];
580 felem_shrink(small, in);
581 smallfelem_square(out, small);
582 }
583
584 /*-
585 * smallfelem_mul sets |out| = |small1| * |small2|
586 * On entry:
587 * small1[i] < 2^64
588 * small2[i] < 2^64
589 * On exit:
590 * out[i] < 7 * 2^64 < 2^67
591 */
592 static void smallfelem_mul(longfelem out, const smallfelem small1,
593 const smallfelem small2)
594 {
595 limb a;
596 u64 high, low;
597
598 a = ((uint128_t) small1[0]) * small2[0];
599 low = a;
600 high = a >> 64;
601 out[0] = low;
602 out[1] = high;
603
604 a = ((uint128_t) small1[0]) * small2[1];
605 low = a;
606 high = a >> 64;
607 out[1] += low;
608 out[2] = high;
609
610 a = ((uint128_t) small1[1]) * small2[0];
611 low = a;
612 high = a >> 64;
613 out[1] += low;
614 out[2] += high;
615
616 a = ((uint128_t) small1[0]) * small2[2];
617 low = a;
618 high = a >> 64;
619 out[2] += low;
620 out[3] = high;
621
622 a = ((uint128_t) small1[1]) * small2[1];
623 low = a;
624 high = a >> 64;
625 out[2] += low;
626 out[3] += high;
627
628 a = ((uint128_t) small1[2]) * small2[0];
629 low = a;
630 high = a >> 64;
631 out[2] += low;
632 out[3] += high;
633
634 a = ((uint128_t) small1[0]) * small2[3];
635 low = a;
636 high = a >> 64;
637 out[3] += low;
638 out[4] = high;
639
640 a = ((uint128_t) small1[1]) * small2[2];
641 low = a;
642 high = a >> 64;
643 out[3] += low;
644 out[4] += high;
645
646 a = ((uint128_t) small1[2]) * small2[1];
647 low = a;
648 high = a >> 64;
649 out[3] += low;
650 out[4] += high;
651
652 a = ((uint128_t) small1[3]) * small2[0];
653 low = a;
654 high = a >> 64;
655 out[3] += low;
656 out[4] += high;
657
658 a = ((uint128_t) small1[1]) * small2[3];
659 low = a;
660 high = a >> 64;
661 out[4] += low;
662 out[5] = high;
663
664 a = ((uint128_t) small1[2]) * small2[2];
665 low = a;
666 high = a >> 64;
667 out[4] += low;
668 out[5] += high;
669
670 a = ((uint128_t) small1[3]) * small2[1];
671 low = a;
672 high = a >> 64;
673 out[4] += low;
674 out[5] += high;
675
676 a = ((uint128_t) small1[2]) * small2[3];
677 low = a;
678 high = a >> 64;
679 out[5] += low;
680 out[6] = high;
681
682 a = ((uint128_t) small1[3]) * small2[2];
683 low = a;
684 high = a >> 64;
685 out[5] += low;
686 out[6] += high;
687
688 a = ((uint128_t) small1[3]) * small2[3];
689 low = a;
690 high = a >> 64;
691 out[6] += low;
692 out[7] = high;
693 }
694
695 /*-
696 * felem_mul sets |out| = |in1| * |in2|
697 * On entry:
698 * in1[i] < 2^109
699 * in2[i] < 2^109
700 * On exit:
701 * out[i] < 7 * 2^64 < 2^67
702 */
703 static void felem_mul(longfelem out, const felem in1, const felem in2)
704 {
705 smallfelem small1, small2;
706 felem_shrink(small1, in1);
707 felem_shrink(small2, in2);
708 smallfelem_mul(out, small1, small2);
709 }
710
711 /*-
712 * felem_small_mul sets |out| = |small1| * |in2|
713 * On entry:
714 * small1[i] < 2^64
715 * in2[i] < 2^109
716 * On exit:
717 * out[i] < 7 * 2^64 < 2^67
718 */
719 static void felem_small_mul(longfelem out, const smallfelem small1,
720 const felem in2)
721 {
722 smallfelem small2;
723 felem_shrink(small2, in2);
724 smallfelem_mul(out, small1, small2);
725 }
726
727 # define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
728 # define two100 (((limb)1) << 100)
729 # define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
730 /* zero100 is 0 mod p */
731 static const felem zero100 =
732 { two100m36m4, two100, two100m36p4, two100m36p4 };
733
734 /*-
735 * Internal function for the different flavours of felem_reduce.
736 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
737 * On entry:
738 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
739 * out[1] >= in[7] + 2^32*in[4]
740 * out[2] >= in[5] + 2^32*in[5]
741 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
742 * On exit:
743 * out[0] <= out[0] + in[4] + 2^32*in[5]
744 * out[1] <= out[1] + in[5] + 2^33*in[6]
745 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
746 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
747 */
748 static void felem_reduce_(felem out, const longfelem in)
749 {
750 int128_t c;
751 /* combine common terms from below */
752 c = in[4] + (in[5] << 32);
753 out[0] += c;
754 out[3] -= c;
755
756 c = in[5] - in[7];
757 out[1] += c;
758 out[2] -= c;
759
760 /* the remaining terms */
761 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
762 out[1] -= (in[4] << 32);
763 out[3] += (in[4] << 32);
764
765 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
766 out[2] -= (in[5] << 32);
767
768 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
769 out[0] -= in[6];
770 out[0] -= (in[6] << 32);
771 out[1] += (in[6] << 33);
772 out[2] += (in[6] * 2);
773 out[3] -= (in[6] << 32);
774
775 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
776 out[0] -= in[7];
777 out[0] -= (in[7] << 32);
778 out[2] += (in[7] << 33);
779 out[3] += (in[7] * 3);
780 }
781
782 /*-
783 * felem_reduce converts a longfelem into an felem.
784 * To be called directly after felem_square or felem_mul.
785 * On entry:
786 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
787 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
788 * On exit:
789 * out[i] < 2^101
790 */
791 static void felem_reduce(felem out, const longfelem in)
792 {
793 out[0] = zero100[0] + in[0];
794 out[1] = zero100[1] + in[1];
795 out[2] = zero100[2] + in[2];
796 out[3] = zero100[3] + in[3];
797
798 felem_reduce_(out, in);
799
800 /*-
801 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
802 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
803 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
804 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
805 *
806 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
807 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
808 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
809 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
810 */
811 }
812
813 /*-
814 * felem_reduce_zero105 converts a larger longfelem into an felem.
815 * On entry:
816 * in[0] < 2^71
817 * On exit:
818 * out[i] < 2^106
819 */
820 static void felem_reduce_zero105(felem out, const longfelem in)
821 {
822 out[0] = zero105[0] + in[0];
823 out[1] = zero105[1] + in[1];
824 out[2] = zero105[2] + in[2];
825 out[3] = zero105[3] + in[3];
826
827 felem_reduce_(out, in);
828
829 /*-
830 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
831 * out[1] > 2^105 - 2^71 - 2^103 > 0
832 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
833 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
834 *
835 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
836 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
837 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
838 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
839 */
840 }
841
842 /*
843 * subtract_u64 sets *result = *result - v and *carry to one if the
844 * subtraction underflowed.
845 */
846 static void subtract_u64(u64 *result, u64 *carry, u64 v)
847 {
848 uint128_t r = *result;
849 r -= v;
850 *carry = (r >> 64) & 1;
851 *result = (u64)r;
852 }
853
854 /*
855 * felem_contract converts |in| to its unique, minimal representation. On
856 * entry: in[i] < 2^109
857 */
858 static void felem_contract(smallfelem out, const felem in)
859 {
860 unsigned i;
861 u64 all_equal_so_far = 0, result = 0, carry;
862
863 felem_shrink(out, in);
864 /* small is minimal except that the value might be > p */
865
866 all_equal_so_far--;
867 /*
868 * We are doing a constant time test if out >= kPrime. We need to compare
869 * each u64, from most-significant to least significant. For each one, if
870 * all words so far have been equal (m is all ones) then a non-equal
871 * result is the answer. Otherwise we continue.
872 */
873 for (i = 3; i < 4; i--) {
874 u64 equal;
875 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
876 /*
877 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
878 * will all be set.
879 */
880 result |= all_equal_so_far & ((u64)(a >> 64));
881
882 /*
883 * if kPrime[i] == out[i] then |equal| will be all zeros and the
884 * decrement will make it all ones.
885 */
886 equal = kPrime[i] ^ out[i];
887 equal--;
888 equal &= equal << 32;
889 equal &= equal << 16;
890 equal &= equal << 8;
891 equal &= equal << 4;
892 equal &= equal << 2;
893 equal &= equal << 1;
894 equal = ((s64) equal) >> 63;
895
896 all_equal_so_far &= equal;
897 }
898
899 /*
900 * if all_equal_so_far is still all ones then the two values are equal
901 * and so out >= kPrime is true.
902 */
903 result |= all_equal_so_far;
904
905 /* if out >= kPrime then we subtract kPrime. */
906 subtract_u64(&out[0], &carry, result & kPrime[0]);
907 subtract_u64(&out[1], &carry, carry);
908 subtract_u64(&out[2], &carry, carry);
909 subtract_u64(&out[3], &carry, carry);
910
911 subtract_u64(&out[1], &carry, result & kPrime[1]);
912 subtract_u64(&out[2], &carry, carry);
913 subtract_u64(&out[3], &carry, carry);
914
915 subtract_u64(&out[2], &carry, result & kPrime[2]);
916 subtract_u64(&out[3], &carry, carry);
917
918 subtract_u64(&out[3], &carry, result & kPrime[3]);
919 }
920
921 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
922 {
923 longfelem longtmp;
924 felem tmp;
925
926 smallfelem_square(longtmp, in);
927 felem_reduce(tmp, longtmp);
928 felem_contract(out, tmp);
929 }
930
931 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
932 const smallfelem in2)
933 {
934 longfelem longtmp;
935 felem tmp;
936
937 smallfelem_mul(longtmp, in1, in2);
938 felem_reduce(tmp, longtmp);
939 felem_contract(out, tmp);
940 }
941
942 /*-
943 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
944 * otherwise.
945 * On entry:
946 * small[i] < 2^64
947 */
948 static limb smallfelem_is_zero(const smallfelem small)
949 {
950 limb result;
951 u64 is_p;
952
953 u64 is_zero = small[0] | small[1] | small[2] | small[3];
954 is_zero--;
955 is_zero &= is_zero << 32;
956 is_zero &= is_zero << 16;
957 is_zero &= is_zero << 8;
958 is_zero &= is_zero << 4;
959 is_zero &= is_zero << 2;
960 is_zero &= is_zero << 1;
961 is_zero = ((s64) is_zero) >> 63;
962
963 is_p = (small[0] ^ kPrime[0]) |
964 (small[1] ^ kPrime[1]) |
965 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
966 is_p--;
967 is_p &= is_p << 32;
968 is_p &= is_p << 16;
969 is_p &= is_p << 8;
970 is_p &= is_p << 4;
971 is_p &= is_p << 2;
972 is_p &= is_p << 1;
973 is_p = ((s64) is_p) >> 63;
974
975 is_zero |= is_p;
976
977 result = is_zero;
978 result |= ((limb) is_zero) << 64;
979 return result;
980 }
981
982 static int smallfelem_is_zero_int(const smallfelem small)
983 {
984 return (int)(smallfelem_is_zero(small) & ((limb) 1));
985 }
986
987 /*-
988 * felem_inv calculates |out| = |in|^{-1}
989 *
990 * Based on Fermat's Little Theorem:
991 * a^p = a (mod p)
992 * a^{p-1} = 1 (mod p)
993 * a^{p-2} = a^{-1} (mod p)
994 */
995 static void felem_inv(felem out, const felem in)
996 {
997 felem ftmp, ftmp2;
998 /* each e_I will hold |in|^{2^I - 1} */
999 felem e2, e4, e8, e16, e32, e64;
1000 longfelem tmp;
1001 unsigned i;
1002
1003 felem_square(tmp, in);
1004 felem_reduce(ftmp, tmp); /* 2^1 */
1005 felem_mul(tmp, in, ftmp);
1006 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
1007 felem_assign(e2, ftmp);
1008 felem_square(tmp, ftmp);
1009 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
1010 felem_square(tmp, ftmp);
1011 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1012 felem_mul(tmp, ftmp, e2);
1013 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1014 felem_assign(e4, ftmp);
1015 felem_square(tmp, ftmp);
1016 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1017 felem_square(tmp, ftmp);
1018 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1019 felem_square(tmp, ftmp);
1020 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1021 felem_square(tmp, ftmp);
1022 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1023 felem_mul(tmp, ftmp, e4);
1024 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1025 felem_assign(e8, ftmp);
1026 for (i = 0; i < 8; i++) {
1027 felem_square(tmp, ftmp);
1028 felem_reduce(ftmp, tmp);
1029 } /* 2^16 - 2^8 */
1030 felem_mul(tmp, ftmp, e8);
1031 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1032 felem_assign(e16, ftmp);
1033 for (i = 0; i < 16; i++) {
1034 felem_square(tmp, ftmp);
1035 felem_reduce(ftmp, tmp);
1036 } /* 2^32 - 2^16 */
1037 felem_mul(tmp, ftmp, e16);
1038 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1039 felem_assign(e32, ftmp);
1040 for (i = 0; i < 32; i++) {
1041 felem_square(tmp, ftmp);
1042 felem_reduce(ftmp, tmp);
1043 } /* 2^64 - 2^32 */
1044 felem_assign(e64, ftmp);
1045 felem_mul(tmp, ftmp, in);
1046 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1047 for (i = 0; i < 192; i++) {
1048 felem_square(tmp, ftmp);
1049 felem_reduce(ftmp, tmp);
1050 } /* 2^256 - 2^224 + 2^192 */
1051
1052 felem_mul(tmp, e64, e32);
1053 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1054 for (i = 0; i < 16; i++) {
1055 felem_square(tmp, ftmp2);
1056 felem_reduce(ftmp2, tmp);
1057 } /* 2^80 - 2^16 */
1058 felem_mul(tmp, ftmp2, e16);
1059 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1060 for (i = 0; i < 8; i++) {
1061 felem_square(tmp, ftmp2);
1062 felem_reduce(ftmp2, tmp);
1063 } /* 2^88 - 2^8 */
1064 felem_mul(tmp, ftmp2, e8);
1065 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1066 for (i = 0; i < 4; i++) {
1067 felem_square(tmp, ftmp2);
1068 felem_reduce(ftmp2, tmp);
1069 } /* 2^92 - 2^4 */
1070 felem_mul(tmp, ftmp2, e4);
1071 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1072 felem_square(tmp, ftmp2);
1073 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1074 felem_square(tmp, ftmp2);
1075 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1076 felem_mul(tmp, ftmp2, e2);
1077 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1078 felem_square(tmp, ftmp2);
1079 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1080 felem_square(tmp, ftmp2);
1081 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1082 felem_mul(tmp, ftmp2, in);
1083 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1084
1085 felem_mul(tmp, ftmp2, ftmp);
1086 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1087 }
1088
1089 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1090 {
1091 felem tmp;
1092
1093 smallfelem_expand(tmp, in);
1094 felem_inv(tmp, tmp);
1095 felem_contract(out, tmp);
1096 }
1097
1098 /*-
1099 * Group operations
1100 * ----------------
1101 *
1102 * Building on top of the field operations we have the operations on the
1103 * elliptic curve group itself. Points on the curve are represented in Jacobian
1104 * coordinates
1105 */
1106
1107 /*-
1108 * point_double calculates 2*(x_in, y_in, z_in)
1109 *
1110 * The method is taken from:
1111 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1112 *
1113 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1114 * while x_out == y_in is not (maybe this works, but it's not tested).
1115 */
1116 static void
1117 point_double(felem x_out, felem y_out, felem z_out,
1118 const felem x_in, const felem y_in, const felem z_in)
1119 {
1120 longfelem tmp, tmp2;
1121 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1122 smallfelem small1, small2;
1123
1124 felem_assign(ftmp, x_in);
1125 /* ftmp[i] < 2^106 */
1126 felem_assign(ftmp2, x_in);
1127 /* ftmp2[i] < 2^106 */
1128
1129 /* delta = z^2 */
1130 felem_square(tmp, z_in);
1131 felem_reduce(delta, tmp);
1132 /* delta[i] < 2^101 */
1133
1134 /* gamma = y^2 */
1135 felem_square(tmp, y_in);
1136 felem_reduce(gamma, tmp);
1137 /* gamma[i] < 2^101 */
1138 felem_shrink(small1, gamma);
1139
1140 /* beta = x*gamma */
1141 felem_small_mul(tmp, small1, x_in);
1142 felem_reduce(beta, tmp);
1143 /* beta[i] < 2^101 */
1144
1145 /* alpha = 3*(x-delta)*(x+delta) */
1146 felem_diff(ftmp, delta);
1147 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1148 felem_sum(ftmp2, delta);
1149 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1150 felem_scalar(ftmp2, 3);
1151 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1152 felem_mul(tmp, ftmp, ftmp2);
1153 felem_reduce(alpha, tmp);
1154 /* alpha[i] < 2^101 */
1155 felem_shrink(small2, alpha);
1156
1157 /* x' = alpha^2 - 8*beta */
1158 smallfelem_square(tmp, small2);
1159 felem_reduce(x_out, tmp);
1160 felem_assign(ftmp, beta);
1161 felem_scalar(ftmp, 8);
1162 /* ftmp[i] < 8 * 2^101 = 2^104 */
1163 felem_diff(x_out, ftmp);
1164 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1165
1166 /* z' = (y + z)^2 - gamma - delta */
1167 felem_sum(delta, gamma);
1168 /* delta[i] < 2^101 + 2^101 = 2^102 */
1169 felem_assign(ftmp, y_in);
1170 felem_sum(ftmp, z_in);
1171 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1172 felem_square(tmp, ftmp);
1173 felem_reduce(z_out, tmp);
1174 felem_diff(z_out, delta);
1175 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1176
1177 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1178 felem_scalar(beta, 4);
1179 /* beta[i] < 4 * 2^101 = 2^103 */
1180 felem_diff_zero107(beta, x_out);
1181 /* beta[i] < 2^107 + 2^103 < 2^108 */
1182 felem_small_mul(tmp, small2, beta);
1183 /* tmp[i] < 7 * 2^64 < 2^67 */
1184 smallfelem_square(tmp2, small1);
1185 /* tmp2[i] < 7 * 2^64 */
1186 longfelem_scalar(tmp2, 8);
1187 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1188 longfelem_diff(tmp, tmp2);
1189 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1190 felem_reduce_zero105(y_out, tmp);
1191 /* y_out[i] < 2^106 */
1192 }
1193
1194 /*
1195 * point_double_small is the same as point_double, except that it operates on
1196 * smallfelems
1197 */
1198 static void
1199 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1200 const smallfelem x_in, const smallfelem y_in,
1201 const smallfelem z_in)
1202 {
1203 felem felem_x_out, felem_y_out, felem_z_out;
1204 felem felem_x_in, felem_y_in, felem_z_in;
1205
1206 smallfelem_expand(felem_x_in, x_in);
1207 smallfelem_expand(felem_y_in, y_in);
1208 smallfelem_expand(felem_z_in, z_in);
1209 point_double(felem_x_out, felem_y_out, felem_z_out,
1210 felem_x_in, felem_y_in, felem_z_in);
1211 felem_shrink(x_out, felem_x_out);
1212 felem_shrink(y_out, felem_y_out);
1213 felem_shrink(z_out, felem_z_out);
1214 }
1215
1216 /* copy_conditional copies in to out iff mask is all ones. */
1217 static void copy_conditional(felem out, const felem in, limb mask)
1218 {
1219 unsigned i;
1220 for (i = 0; i < NLIMBS; ++i) {
1221 const limb tmp = mask & (in[i] ^ out[i]);
1222 out[i] ^= tmp;
1223 }
1224 }
1225
1226 /* copy_small_conditional copies in to out iff mask is all ones. */
1227 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1228 {
1229 unsigned i;
1230 const u64 mask64 = mask;
1231 for (i = 0; i < NLIMBS; ++i) {
1232 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1233 }
1234 }
1235
1236 /*-
1237 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1238 *
1239 * The method is taken from:
1240 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1241 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1242 *
1243 * This function includes a branch for checking whether the two input points
1244 * are equal, (while not equal to the point at infinity). This case never
1245 * happens during single point multiplication, so there is no timing leak for
1246 * ECDH or ECDSA signing.
1247 */
1248 static void point_add(felem x3, felem y3, felem z3,
1249 const felem x1, const felem y1, const felem z1,
1250 const int mixed, const smallfelem x2,
1251 const smallfelem y2, const smallfelem z2)
1252 {
1253 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1254 longfelem tmp, tmp2;
1255 smallfelem small1, small2, small3, small4, small5;
1256 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1257
1258 felem_shrink(small3, z1);
1259
1260 z1_is_zero = smallfelem_is_zero(small3);
1261 z2_is_zero = smallfelem_is_zero(z2);
1262
1263 /* ftmp = z1z1 = z1**2 */
1264 smallfelem_square(tmp, small3);
1265 felem_reduce(ftmp, tmp);
1266 /* ftmp[i] < 2^101 */
1267 felem_shrink(small1, ftmp);
1268
1269 if (!mixed) {
1270 /* ftmp2 = z2z2 = z2**2 */
1271 smallfelem_square(tmp, z2);
1272 felem_reduce(ftmp2, tmp);
1273 /* ftmp2[i] < 2^101 */
1274 felem_shrink(small2, ftmp2);
1275
1276 felem_shrink(small5, x1);
1277
1278 /* u1 = ftmp3 = x1*z2z2 */
1279 smallfelem_mul(tmp, small5, small2);
1280 felem_reduce(ftmp3, tmp);
1281 /* ftmp3[i] < 2^101 */
1282
1283 /* ftmp5 = z1 + z2 */
1284 felem_assign(ftmp5, z1);
1285 felem_small_sum(ftmp5, z2);
1286 /* ftmp5[i] < 2^107 */
1287
1288 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1289 felem_square(tmp, ftmp5);
1290 felem_reduce(ftmp5, tmp);
1291 /* ftmp2 = z2z2 + z1z1 */
1292 felem_sum(ftmp2, ftmp);
1293 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1294 felem_diff(ftmp5, ftmp2);
1295 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1296
1297 /* ftmp2 = z2 * z2z2 */
1298 smallfelem_mul(tmp, small2, z2);
1299 felem_reduce(ftmp2, tmp);
1300
1301 /* s1 = ftmp2 = y1 * z2**3 */
1302 felem_mul(tmp, y1, ftmp2);
1303 felem_reduce(ftmp6, tmp);
1304 /* ftmp6[i] < 2^101 */
1305 } else {
1306 /*
1307 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1308 */
1309
1310 /* u1 = ftmp3 = x1*z2z2 */
1311 felem_assign(ftmp3, x1);
1312 /* ftmp3[i] < 2^106 */
1313
1314 /* ftmp5 = 2z1z2 */
1315 felem_assign(ftmp5, z1);
1316 felem_scalar(ftmp5, 2);
1317 /* ftmp5[i] < 2*2^106 = 2^107 */
1318
1319 /* s1 = ftmp2 = y1 * z2**3 */
1320 felem_assign(ftmp6, y1);
1321 /* ftmp6[i] < 2^106 */
1322 }
1323
1324 /* u2 = x2*z1z1 */
1325 smallfelem_mul(tmp, x2, small1);
1326 felem_reduce(ftmp4, tmp);
1327
1328 /* h = ftmp4 = u2 - u1 */
1329 felem_diff_zero107(ftmp4, ftmp3);
1330 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1331 felem_shrink(small4, ftmp4);
1332
1333 x_equal = smallfelem_is_zero(small4);
1334
1335 /* z_out = ftmp5 * h */
1336 felem_small_mul(tmp, small4, ftmp5);
1337 felem_reduce(z_out, tmp);
1338 /* z_out[i] < 2^101 */
1339
1340 /* ftmp = z1 * z1z1 */
1341 smallfelem_mul(tmp, small1, small3);
1342 felem_reduce(ftmp, tmp);
1343
1344 /* s2 = tmp = y2 * z1**3 */
1345 felem_small_mul(tmp, y2, ftmp);
1346 felem_reduce(ftmp5, tmp);
1347
1348 /* r = ftmp5 = (s2 - s1)*2 */
1349 felem_diff_zero107(ftmp5, ftmp6);
1350 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1351 felem_scalar(ftmp5, 2);
1352 /* ftmp5[i] < 2^109 */
1353 felem_shrink(small1, ftmp5);
1354 y_equal = smallfelem_is_zero(small1);
1355
1356 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1357 point_double(x3, y3, z3, x1, y1, z1);
1358 return;
1359 }
1360
1361 /* I = ftmp = (2h)**2 */
1362 felem_assign(ftmp, ftmp4);
1363 felem_scalar(ftmp, 2);
1364 /* ftmp[i] < 2*2^108 = 2^109 */
1365 felem_square(tmp, ftmp);
1366 felem_reduce(ftmp, tmp);
1367
1368 /* J = ftmp2 = h * I */
1369 felem_mul(tmp, ftmp4, ftmp);
1370 felem_reduce(ftmp2, tmp);
1371
1372 /* V = ftmp4 = U1 * I */
1373 felem_mul(tmp, ftmp3, ftmp);
1374 felem_reduce(ftmp4, tmp);
1375
1376 /* x_out = r**2 - J - 2V */
1377 smallfelem_square(tmp, small1);
1378 felem_reduce(x_out, tmp);
1379 felem_assign(ftmp3, ftmp4);
1380 felem_scalar(ftmp4, 2);
1381 felem_sum(ftmp4, ftmp2);
1382 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1383 felem_diff(x_out, ftmp4);
1384 /* x_out[i] < 2^105 + 2^101 */
1385
1386 /* y_out = r(V-x_out) - 2 * s1 * J */
1387 felem_diff_zero107(ftmp3, x_out);
1388 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1389 felem_small_mul(tmp, small1, ftmp3);
1390 felem_mul(tmp2, ftmp6, ftmp2);
1391 longfelem_scalar(tmp2, 2);
1392 /* tmp2[i] < 2*2^67 = 2^68 */
1393 longfelem_diff(tmp, tmp2);
1394 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1395 felem_reduce_zero105(y_out, tmp);
1396 /* y_out[i] < 2^106 */
1397
1398 copy_small_conditional(x_out, x2, z1_is_zero);
1399 copy_conditional(x_out, x1, z2_is_zero);
1400 copy_small_conditional(y_out, y2, z1_is_zero);
1401 copy_conditional(y_out, y1, z2_is_zero);
1402 copy_small_conditional(z_out, z2, z1_is_zero);
1403 copy_conditional(z_out, z1, z2_is_zero);
1404 felem_assign(x3, x_out);
1405 felem_assign(y3, y_out);
1406 felem_assign(z3, z_out);
1407 }
1408
1409 /*
1410 * point_add_small is the same as point_add, except that it operates on
1411 * smallfelems
1412 */
1413 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1414 smallfelem x1, smallfelem y1, smallfelem z1,
1415 smallfelem x2, smallfelem y2, smallfelem z2)
1416 {
1417 felem felem_x3, felem_y3, felem_z3;
1418 felem felem_x1, felem_y1, felem_z1;
1419 smallfelem_expand(felem_x1, x1);
1420 smallfelem_expand(felem_y1, y1);
1421 smallfelem_expand(felem_z1, z1);
1422 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1423 x2, y2, z2);
1424 felem_shrink(x3, felem_x3);
1425 felem_shrink(y3, felem_y3);
1426 felem_shrink(z3, felem_z3);
1427 }
1428
1429 /*-
1430 * Base point pre computation
1431 * --------------------------
1432 *
1433 * Two different sorts of precomputed tables are used in the following code.
1434 * Each contain various points on the curve, where each point is three field
1435 * elements (x, y, z).
1436 *
1437 * For the base point table, z is usually 1 (0 for the point at infinity).
1438 * This table has 2 * 16 elements, starting with the following:
1439 * index | bits | point
1440 * ------+---------+------------------------------
1441 * 0 | 0 0 0 0 | 0G
1442 * 1 | 0 0 0 1 | 1G
1443 * 2 | 0 0 1 0 | 2^64G
1444 * 3 | 0 0 1 1 | (2^64 + 1)G
1445 * 4 | 0 1 0 0 | 2^128G
1446 * 5 | 0 1 0 1 | (2^128 + 1)G
1447 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1448 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1449 * 8 | 1 0 0 0 | 2^192G
1450 * 9 | 1 0 0 1 | (2^192 + 1)G
1451 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1452 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1453 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1454 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1455 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1456 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1457 * followed by a copy of this with each element multiplied by 2^32.
1458 *
1459 * The reason for this is so that we can clock bits into four different
1460 * locations when doing simple scalar multiplies against the base point,
1461 * and then another four locations using the second 16 elements.
1462 *
1463 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1464
1465 /* gmul is the table of precomputed base points */
1466 static const smallfelem gmul[2][16][3] = {
1467 {{{0, 0, 0, 0},
1468 {0, 0, 0, 0},
1469 {0, 0, 0, 0}},
1470 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1471 0x6b17d1f2e12c4247},
1472 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1473 0x4fe342e2fe1a7f9b},
1474 {1, 0, 0, 0}},
1475 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1476 0x0fa822bc2811aaa5},
1477 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1478 0xbff44ae8f5dba80d},
1479 {1, 0, 0, 0}},
1480 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1481 0x300a4bbc89d6726f},
1482 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1483 0x72aac7e0d09b4644},
1484 {1, 0, 0, 0}},
1485 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1486 0x447d739beedb5e67},
1487 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1488 0x2d4825ab834131ee},
1489 {1, 0, 0, 0}},
1490 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1491 0xef9519328a9c72ff},
1492 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1493 0x611e9fc37dbb2c9b},
1494 {1, 0, 0, 0}},
1495 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1496 0x550663797b51f5d8},
1497 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1498 0x157164848aecb851},
1499 {1, 0, 0, 0}},
1500 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1501 0xeb5d7745b21141ea},
1502 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1503 0xeafd72ebdbecc17b},
1504 {1, 0, 0, 0}},
1505 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1506 0xa6d39677a7849276},
1507 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1508 0x674f84749b0b8816},
1509 {1, 0, 0, 0}},
1510 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1511 0x4e769e7672c9ddad},
1512 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1513 0x42b99082de830663},
1514 {1, 0, 0, 0}},
1515 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1516 0x78878ef61c6ce04d},
1517 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1518 0xb6cb3f5d7b72c321},
1519 {1, 0, 0, 0}},
1520 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1521 0x0c88bc4d716b1287},
1522 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1523 0xdd5ddea3f3901dc6},
1524 {1, 0, 0, 0}},
1525 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1526 0x68f344af6b317466},
1527 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1528 0x31b9c405f8540a20},
1529 {1, 0, 0, 0}},
1530 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1531 0x4052bf4b6f461db9},
1532 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1533 0xfecf4d5190b0fc61},
1534 {1, 0, 0, 0}},
1535 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1536 0x1eddbae2c802e41a},
1537 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1538 0x43104d86560ebcfc},
1539 {1, 0, 0, 0}},
1540 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1541 0xb48e26b484f7a21c},
1542 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1543 0xfac015404d4d3dab},
1544 {1, 0, 0, 0}}},
1545 {{{0, 0, 0, 0},
1546 {0, 0, 0, 0},
1547 {0, 0, 0, 0}},
1548 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1549 0x7fe36b40af22af89},
1550 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1551 0xe697d45825b63624},
1552 {1, 0, 0, 0}},
1553 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1554 0x4a5b506612a677a6},
1555 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1556 0xeb13461ceac089f1},
1557 {1, 0, 0, 0}},
1558 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1559 0x0781b8291c6a220a},
1560 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1561 0x690cde8df0151593},
1562 {1, 0, 0, 0}},
1563 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1564 0x8a535f566ec73617},
1565 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1566 0x0455c08468b08bd7},
1567 {1, 0, 0, 0}},
1568 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1569 0x06bada7ab77f8276},
1570 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1571 0x5b476dfd0e6cb18a},
1572 {1, 0, 0, 0}},
1573 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1574 0x3e29864e8a2ec908},
1575 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1576 0x239b90ea3dc31e7e},
1577 {1, 0, 0, 0}},
1578 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1579 0x820f4dd949f72ff7},
1580 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1581 0x140406ec783a05ec},
1582 {1, 0, 0, 0}},
1583 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1584 0x68f6b8542783dfee},
1585 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1586 0xcbe1feba92e40ce6},
1587 {1, 0, 0, 0}},
1588 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1589 0xd0b2f94d2f420109},
1590 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1591 0x971459828b0719e5},
1592 {1, 0, 0, 0}},
1593 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1594 0x961610004a866aba},
1595 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1596 0x7acb9fadcee75e44},
1597 {1, 0, 0, 0}},
1598 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1599 0x24eb9acca333bf5b},
1600 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1601 0x69f891c5acd079cc},
1602 {1, 0, 0, 0}},
1603 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1604 0xe51f547c5972a107},
1605 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1606 0x1c309a2b25bb1387},
1607 {1, 0, 0, 0}},
1608 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1609 0x20b87b8aa2c4e503},
1610 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1611 0xf5c6fa49919776be},
1612 {1, 0, 0, 0}},
1613 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1614 0x1ed7d1b9332010b9},
1615 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1616 0x3a2b03f03217257a},
1617 {1, 0, 0, 0}},
1618 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1619 0x15fee545c78dd9f6},
1620 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1621 0x4ab5b6b2b8753f81},
1622 {1, 0, 0, 0}}}
1623 };
1624
1625 /*
1626 * select_point selects the |idx|th point from a precomputation table and
1627 * copies it to out.
1628 */
1629 static void select_point(const u64 idx, unsigned int size,
1630 const smallfelem pre_comp[16][3], smallfelem out[3])
1631 {
1632 unsigned i, j;
1633 u64 *outlimbs = &out[0][0];
1634
1635 memset(out, 0, sizeof(*out) * 3);
1636
1637 for (i = 0; i < size; i++) {
1638 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1639 u64 mask = i ^ idx;
1640 mask |= mask >> 4;
1641 mask |= mask >> 2;
1642 mask |= mask >> 1;
1643 mask &= 1;
1644 mask--;
1645 for (j = 0; j < NLIMBS * 3; j++)
1646 outlimbs[j] |= inlimbs[j] & mask;
1647 }
1648 }
1649
1650 /* get_bit returns the |i|th bit in |in| */
1651 static char get_bit(const felem_bytearray in, int i)
1652 {
1653 if ((i < 0) || (i >= 256))
1654 return 0;
1655 return (in[i >> 3] >> (i & 7)) & 1;
1656 }
1657
1658 /*
1659 * Interleaved point multiplication using precomputed point multiples: The
1660 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1661 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1662 * generator, using certain (large) precomputed multiples in g_pre_comp.
1663 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1664 */
1665 static void batch_mul(felem x_out, felem y_out, felem z_out,
1666 const felem_bytearray scalars[],
1667 const unsigned num_points, const u8 *g_scalar,
1668 const int mixed, const smallfelem pre_comp[][17][3],
1669 const smallfelem g_pre_comp[2][16][3])
1670 {
1671 int i, skip;
1672 unsigned num, gen_mul = (g_scalar != NULL);
1673 felem nq[3], ftmp;
1674 smallfelem tmp[3];
1675 u64 bits;
1676 u8 sign, digit;
1677
1678 /* set nq to the point at infinity */
1679 memset(nq, 0, sizeof(nq));
1680
1681 /*
1682 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1683 * of the generator (two in each of the last 32 rounds) and additions of
1684 * other points multiples (every 5th round).
1685 */
1686 skip = 1; /* save two point operations in the first
1687 * round */
1688 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1689 /* double */
1690 if (!skip)
1691 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1692
1693 /* add multiples of the generator */
1694 if (gen_mul && (i <= 31)) {
1695 /* first, look 32 bits upwards */
1696 bits = get_bit(g_scalar, i + 224) << 3;
1697 bits |= get_bit(g_scalar, i + 160) << 2;
1698 bits |= get_bit(g_scalar, i + 96) << 1;
1699 bits |= get_bit(g_scalar, i + 32);
1700 /* select the point to add, in constant time */
1701 select_point(bits, 16, g_pre_comp[1], tmp);
1702
1703 if (!skip) {
1704 /* Arg 1 below is for "mixed" */
1705 point_add(nq[0], nq[1], nq[2],
1706 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1707 } else {
1708 smallfelem_expand(nq[0], tmp[0]);
1709 smallfelem_expand(nq[1], tmp[1]);
1710 smallfelem_expand(nq[2], tmp[2]);
1711 skip = 0;
1712 }
1713
1714 /* second, look at the current position */
1715 bits = get_bit(g_scalar, i + 192) << 3;
1716 bits |= get_bit(g_scalar, i + 128) << 2;
1717 bits |= get_bit(g_scalar, i + 64) << 1;
1718 bits |= get_bit(g_scalar, i);
1719 /* select the point to add, in constant time */
1720 select_point(bits, 16, g_pre_comp[0], tmp);
1721 /* Arg 1 below is for "mixed" */
1722 point_add(nq[0], nq[1], nq[2],
1723 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1724 }
1725
1726 /* do other additions every 5 doublings */
1727 if (num_points && (i % 5 == 0)) {
1728 /* loop over all scalars */
1729 for (num = 0; num < num_points; ++num) {
1730 bits = get_bit(scalars[num], i + 4) << 5;
1731 bits |= get_bit(scalars[num], i + 3) << 4;
1732 bits |= get_bit(scalars[num], i + 2) << 3;
1733 bits |= get_bit(scalars[num], i + 1) << 2;
1734 bits |= get_bit(scalars[num], i) << 1;
1735 bits |= get_bit(scalars[num], i - 1);
1736 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1737
1738 /*
1739 * select the point to add or subtract, in constant time
1740 */
1741 select_point(digit, 17, pre_comp[num], tmp);
1742 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1743 * point */
1744 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1745 felem_contract(tmp[1], ftmp);
1746
1747 if (!skip) {
1748 point_add(nq[0], nq[1], nq[2],
1749 nq[0], nq[1], nq[2],
1750 mixed, tmp[0], tmp[1], tmp[2]);
1751 } else {
1752 smallfelem_expand(nq[0], tmp[0]);
1753 smallfelem_expand(nq[1], tmp[1]);
1754 smallfelem_expand(nq[2], tmp[2]);
1755 skip = 0;
1756 }
1757 }
1758 }
1759 }
1760 felem_assign(x_out, nq[0]);
1761 felem_assign(y_out, nq[1]);
1762 felem_assign(z_out, nq[2]);
1763 }
1764
1765 /* Precomputation for the group generator. */
1766 struct nistp256_pre_comp_st {
1767 smallfelem g_pre_comp[2][16][3];
1768 CRYPTO_REF_COUNT references;
1769 CRYPTO_RWLOCK *lock;
1770 };
1771
1772 const EC_METHOD *EC_GFp_nistp256_method(void)
1773 {
1774 static const EC_METHOD ret = {
1775 EC_FLAGS_DEFAULT_OCT,
1776 NID_X9_62_prime_field,
1777 ec_GFp_nistp256_group_init,
1778 ec_GFp_simple_group_finish,
1779 ec_GFp_simple_group_clear_finish,
1780 ec_GFp_nist_group_copy,
1781 ec_GFp_nistp256_group_set_curve,
1782 ec_GFp_simple_group_get_curve,
1783 ec_GFp_simple_group_get_degree,
1784 ec_group_simple_order_bits,
1785 ec_GFp_simple_group_check_discriminant,
1786 ec_GFp_simple_point_init,
1787 ec_GFp_simple_point_finish,
1788 ec_GFp_simple_point_clear_finish,
1789 ec_GFp_simple_point_copy,
1790 ec_GFp_simple_point_set_to_infinity,
1791 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1792 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1793 ec_GFp_simple_point_set_affine_coordinates,
1794 ec_GFp_nistp256_point_get_affine_coordinates,
1795 0 /* point_set_compressed_coordinates */ ,
1796 0 /* point2oct */ ,
1797 0 /* oct2point */ ,
1798 ec_GFp_simple_add,
1799 ec_GFp_simple_dbl,
1800 ec_GFp_simple_invert,
1801 ec_GFp_simple_is_at_infinity,
1802 ec_GFp_simple_is_on_curve,
1803 ec_GFp_simple_cmp,
1804 ec_GFp_simple_make_affine,
1805 ec_GFp_simple_points_make_affine,
1806 ec_GFp_nistp256_points_mul,
1807 ec_GFp_nistp256_precompute_mult,
1808 ec_GFp_nistp256_have_precompute_mult,
1809 ec_GFp_nist_field_mul,
1810 ec_GFp_nist_field_sqr,
1811 0 /* field_div */ ,
1812 0 /* field_encode */ ,
1813 0 /* field_decode */ ,
1814 0, /* field_set_to_one */
1815 ec_key_simple_priv2oct,
1816 ec_key_simple_oct2priv,
1817 0, /* set private */
1818 ec_key_simple_generate_key,
1819 ec_key_simple_check_key,
1820 ec_key_simple_generate_public_key,
1821 0, /* keycopy */
1822 0, /* keyfinish */
1823 ecdh_simple_compute_key
1824 };
1825
1826 return &ret;
1827 }
1828
1829 /******************************************************************************/
1830 /*
1831 * FUNCTIONS TO MANAGE PRECOMPUTATION
1832 */
1833
1834 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1835 {
1836 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1837
1838 if (ret == NULL) {
1839 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1840 return ret;
1841 }
1842
1843 ret->references = 1;
1844
1845 ret->lock = CRYPTO_THREAD_lock_new();
1846 if (ret->lock == NULL) {
1847 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1848 OPENSSL_free(ret);
1849 return NULL;
1850 }
1851 return ret;
1852 }
1853
1854 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1855 {
1856 int i;
1857 if (p != NULL)
1858 CRYPTO_UP_REF(&p->references, &i, p->lock);
1859 return p;
1860 }
1861
1862 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1863 {
1864 int i;
1865
1866 if (pre == NULL)
1867 return;
1868
1869 CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
1870 REF_PRINT_COUNT("EC_nistp256", x);
1871 if (i > 0)
1872 return;
1873 REF_ASSERT_ISNT(i < 0);
1874
1875 CRYPTO_THREAD_lock_free(pre->lock);
1876 OPENSSL_free(pre);
1877 }
1878
1879 /******************************************************************************/
1880 /*
1881 * OPENSSL EC_METHOD FUNCTIONS
1882 */
1883
1884 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1885 {
1886 int ret;
1887 ret = ec_GFp_simple_group_init(group);
1888 group->a_is_minus3 = 1;
1889 return ret;
1890 }
1891
1892 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1893 const BIGNUM *a, const BIGNUM *b,
1894 BN_CTX *ctx)
1895 {
1896 int ret = 0;
1897 BN_CTX *new_ctx = NULL;
1898 BIGNUM *curve_p, *curve_a, *curve_b;
1899
1900 if (ctx == NULL)
1901 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1902 return 0;
1903 BN_CTX_start(ctx);
1904 curve_p = BN_CTX_get(ctx);
1905 curve_a = BN_CTX_get(ctx);
1906 curve_b = BN_CTX_get(ctx);
1907 if (curve_b == NULL)
1908 goto err;
1909 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1910 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1911 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1912 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1913 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1914 EC_R_WRONG_CURVE_PARAMETERS);
1915 goto err;
1916 }
1917 group->field_mod_func = BN_nist_mod_256;
1918 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1919 err:
1920 BN_CTX_end(ctx);
1921 BN_CTX_free(new_ctx);
1922 return ret;
1923 }
1924
1925 /*
1926 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1927 * (X/Z^2, Y/Z^3)
1928 */
1929 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1930 const EC_POINT *point,
1931 BIGNUM *x, BIGNUM *y,
1932 BN_CTX *ctx)
1933 {
1934 felem z1, z2, x_in, y_in;
1935 smallfelem x_out, y_out;
1936 longfelem tmp;
1937
1938 if (EC_POINT_is_at_infinity(group, point)) {
1939 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1940 EC_R_POINT_AT_INFINITY);
1941 return 0;
1942 }
1943 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1944 (!BN_to_felem(z1, point->Z)))
1945 return 0;
1946 felem_inv(z2, z1);
1947 felem_square(tmp, z2);
1948 felem_reduce(z1, tmp);
1949 felem_mul(tmp, x_in, z1);
1950 felem_reduce(x_in, tmp);
1951 felem_contract(x_out, x_in);
1952 if (x != NULL) {
1953 if (!smallfelem_to_BN(x, x_out)) {
1954 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1955 ERR_R_BN_LIB);
1956 return 0;
1957 }
1958 }
1959 felem_mul(tmp, z1, z2);
1960 felem_reduce(z1, tmp);
1961 felem_mul(tmp, y_in, z1);
1962 felem_reduce(y_in, tmp);
1963 felem_contract(y_out, y_in);
1964 if (y != NULL) {
1965 if (!smallfelem_to_BN(y, y_out)) {
1966 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1967 ERR_R_BN_LIB);
1968 return 0;
1969 }
1970 }
1971 return 1;
1972 }
1973
1974 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
1975 static void make_points_affine(size_t num, smallfelem points[][3],
1976 smallfelem tmp_smallfelems[])
1977 {
1978 /*
1979 * Runs in constant time, unless an input is the point at infinity (which
1980 * normally shouldn't happen).
1981 */
1982 ec_GFp_nistp_points_make_affine_internal(num,
1983 points,
1984 sizeof(smallfelem),
1985 tmp_smallfelems,
1986 (void (*)(void *))smallfelem_one,
1987 (int (*)(const void *))
1988 smallfelem_is_zero_int,
1989 (void (*)(void *, const void *))
1990 smallfelem_assign,
1991 (void (*)(void *, const void *))
1992 smallfelem_square_contract,
1993 (void (*)
1994 (void *, const void *,
1995 const void *))
1996 smallfelem_mul_contract,
1997 (void (*)(void *, const void *))
1998 smallfelem_inv_contract,
1999 /* nothing to contract */
2000 (void (*)(void *, const void *))
2001 smallfelem_assign);
2002 }
2003
2004 /*
2005 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2006 * values Result is stored in r (r can equal one of the inputs).
2007 */
2008 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2009 const BIGNUM *scalar, size_t num,
2010 const EC_POINT *points[],
2011 const BIGNUM *scalars[], BN_CTX *ctx)
2012 {
2013 int ret = 0;
2014 int j;
2015 int mixed = 0;
2016 BN_CTX *new_ctx = NULL;
2017 BIGNUM *x, *y, *z, *tmp_scalar;
2018 felem_bytearray g_secret;
2019 felem_bytearray *secrets = NULL;
2020 smallfelem (*pre_comp)[17][3] = NULL;
2021 smallfelem *tmp_smallfelems = NULL;
2022 felem_bytearray tmp;
2023 unsigned i, num_bytes;
2024 int have_pre_comp = 0;
2025 size_t num_points = num;
2026 smallfelem x_in, y_in, z_in;
2027 felem x_out, y_out, z_out;
2028 NISTP256_PRE_COMP *pre = NULL;
2029 const smallfelem(*g_pre_comp)[16][3] = NULL;
2030 EC_POINT *generator = NULL;
2031 const EC_POINT *p = NULL;
2032 const BIGNUM *p_scalar = NULL;
2033
2034 if (ctx == NULL)
2035 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2036 return 0;
2037 BN_CTX_start(ctx);
2038 x = BN_CTX_get(ctx);
2039 y = BN_CTX_get(ctx);
2040 z = BN_CTX_get(ctx);
2041 tmp_scalar = BN_CTX_get(ctx);
2042 if (tmp_scalar == NULL)
2043 goto err;
2044
2045 if (scalar != NULL) {
2046 pre = group->pre_comp.nistp256;
2047 if (pre)
2048 /* we have precomputation, try to use it */
2049 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2050 else
2051 /* try to use the standard precomputation */
2052 g_pre_comp = &gmul[0];
2053 generator = EC_POINT_new(group);
2054 if (generator == NULL)
2055 goto err;
2056 /* get the generator from precomputation */
2057 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2058 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2059 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2060 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2061 goto err;
2062 }
2063 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
2064 generator, x, y, z,
2065 ctx))
2066 goto err;
2067 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2068 /* precomputation matches generator */
2069 have_pre_comp = 1;
2070 else
2071 /*
2072 * we don't have valid precomputation: treat the generator as a
2073 * random point
2074 */
2075 num_points++;
2076 }
2077 if (num_points > 0) {
2078 if (num_points >= 3) {
2079 /*
2080 * unless we precompute multiples for just one or two points,
2081 * converting those into affine form is time well spent
2082 */
2083 mixed = 1;
2084 }
2085 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2086 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2087 if (mixed)
2088 tmp_smallfelems =
2089 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2090 if ((secrets == NULL) || (pre_comp == NULL)
2091 || (mixed && (tmp_smallfelems == NULL))) {
2092 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
2093 goto err;
2094 }
2095
2096 /*
2097 * we treat NULL scalars as 0, and NULL points as points at infinity,
2098 * i.e., they contribute nothing to the linear combination
2099 */
2100 memset(secrets, 0, sizeof(*secrets) * num_points);
2101 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2102 for (i = 0; i < num_points; ++i) {
2103 if (i == num)
2104 /*
2105 * we didn't have a valid precomputation, so we pick the
2106 * generator
2107 */
2108 {
2109 p = EC_GROUP_get0_generator(group);
2110 p_scalar = scalar;
2111 } else
2112 /* the i^th point */
2113 {
2114 p = points[i];
2115 p_scalar = scalars[i];
2116 }
2117 if ((p_scalar != NULL) && (p != NULL)) {
2118 /* reduce scalar to 0 <= scalar < 2^256 */
2119 if ((BN_num_bits(p_scalar) > 256)
2120 || (BN_is_negative(p_scalar))) {
2121 /*
2122 * this is an unusual input, and we don't guarantee
2123 * constant-timeness
2124 */
2125 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2126 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2127 goto err;
2128 }
2129 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2130 } else
2131 num_bytes = BN_bn2bin(p_scalar, tmp);
2132 flip_endian(secrets[i], tmp, num_bytes);
2133 /* precompute multiples */
2134 if ((!BN_to_felem(x_out, p->X)) ||
2135 (!BN_to_felem(y_out, p->Y)) ||
2136 (!BN_to_felem(z_out, p->Z)))
2137 goto err;
2138 felem_shrink(pre_comp[i][1][0], x_out);
2139 felem_shrink(pre_comp[i][1][1], y_out);
2140 felem_shrink(pre_comp[i][1][2], z_out);
2141 for (j = 2; j <= 16; ++j) {
2142 if (j & 1) {
2143 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2144 pre_comp[i][j][2], pre_comp[i][1][0],
2145 pre_comp[i][1][1], pre_comp[i][1][2],
2146 pre_comp[i][j - 1][0],
2147 pre_comp[i][j - 1][1],
2148 pre_comp[i][j - 1][2]);
2149 } else {
2150 point_double_small(pre_comp[i][j][0],
2151 pre_comp[i][j][1],
2152 pre_comp[i][j][2],
2153 pre_comp[i][j / 2][0],
2154 pre_comp[i][j / 2][1],
2155 pre_comp[i][j / 2][2]);
2156 }
2157 }
2158 }
2159 }
2160 if (mixed)
2161 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2162 }
2163
2164 /* the scalar for the generator */
2165 if ((scalar != NULL) && (have_pre_comp)) {
2166 memset(g_secret, 0, sizeof(g_secret));
2167 /* reduce scalar to 0 <= scalar < 2^256 */
2168 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2169 /*
2170 * this is an unusual input, and we don't guarantee
2171 * constant-timeness
2172 */
2173 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2174 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2175 goto err;
2176 }
2177 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2178 } else
2179 num_bytes = BN_bn2bin(scalar, tmp);
2180 flip_endian(g_secret, tmp, num_bytes);
2181 /* do the multiplication with generator precomputation */
2182 batch_mul(x_out, y_out, z_out,
2183 (const felem_bytearray(*))secrets, num_points,
2184 g_secret,
2185 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2186 } else
2187 /* do the multiplication without generator precomputation */
2188 batch_mul(x_out, y_out, z_out,
2189 (const felem_bytearray(*))secrets, num_points,
2190 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2191 /* reduce the output to its unique minimal representation */
2192 felem_contract(x_in, x_out);
2193 felem_contract(y_in, y_out);
2194 felem_contract(z_in, z_out);
2195 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2196 (!smallfelem_to_BN(z, z_in))) {
2197 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2198 goto err;
2199 }
2200 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2201
2202 err:
2203 BN_CTX_end(ctx);
2204 EC_POINT_free(generator);
2205 BN_CTX_free(new_ctx);
2206 OPENSSL_free(secrets);
2207 OPENSSL_free(pre_comp);
2208 OPENSSL_free(tmp_smallfelems);
2209 return ret;
2210 }
2211
2212 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2213 {
2214 int ret = 0;
2215 NISTP256_PRE_COMP *pre = NULL;
2216 int i, j;
2217 BN_CTX *new_ctx = NULL;
2218 BIGNUM *x, *y;
2219 EC_POINT *generator = NULL;
2220 smallfelem tmp_smallfelems[32];
2221 felem x_tmp, y_tmp, z_tmp;
2222
2223 /* throw away old precomputation */
2224 EC_pre_comp_free(group);
2225 if (ctx == NULL)
2226 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2227 return 0;
2228 BN_CTX_start(ctx);
2229 x = BN_CTX_get(ctx);
2230 y = BN_CTX_get(ctx);
2231 if (y == NULL)
2232 goto err;
2233 /* get the generator */
2234 if (group->generator == NULL)
2235 goto err;
2236 generator = EC_POINT_new(group);
2237 if (generator == NULL)
2238 goto err;
2239 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2240 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2241 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2242 goto err;
2243 if ((pre = nistp256_pre_comp_new()) == NULL)
2244 goto err;
2245 /*
2246 * if the generator is the standard one, use built-in precomputation
2247 */
2248 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2249 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2250 goto done;
2251 }
2252 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2253 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2254 (!BN_to_felem(z_tmp, group->generator->Z)))
2255 goto err;
2256 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2257 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2258 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2259 /*
2260 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2261 * 2^160*G, 2^224*G for the second one
2262 */
2263 for (i = 1; i <= 8; i <<= 1) {
2264 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2265 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2266 pre->g_pre_comp[0][i][1],
2267 pre->g_pre_comp[0][i][2]);
2268 for (j = 0; j < 31; ++j) {
2269 point_double_small(pre->g_pre_comp[1][i][0],
2270 pre->g_pre_comp[1][i][1],
2271 pre->g_pre_comp[1][i][2],
2272 pre->g_pre_comp[1][i][0],
2273 pre->g_pre_comp[1][i][1],
2274 pre->g_pre_comp[1][i][2]);
2275 }
2276 if (i == 8)
2277 break;
2278 point_double_small(pre->g_pre_comp[0][2 * i][0],
2279 pre->g_pre_comp[0][2 * i][1],
2280 pre->g_pre_comp[0][2 * i][2],
2281 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2282 pre->g_pre_comp[1][i][2]);
2283 for (j = 0; j < 31; ++j) {
2284 point_double_small(pre->g_pre_comp[0][2 * i][0],
2285 pre->g_pre_comp[0][2 * i][1],
2286 pre->g_pre_comp[0][2 * i][2],
2287 pre->g_pre_comp[0][2 * i][0],
2288 pre->g_pre_comp[0][2 * i][1],
2289 pre->g_pre_comp[0][2 * i][2]);
2290 }
2291 }
2292 for (i = 0; i < 2; i++) {
2293 /* g_pre_comp[i][0] is the point at infinity */
2294 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2295 /* the remaining multiples */
2296 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2297 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2298 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2299 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2300 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2301 pre->g_pre_comp[i][2][2]);
2302 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2303 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2304 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2305 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2306 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2307 pre->g_pre_comp[i][2][2]);
2308 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2309 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2310 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2311 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2312 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2313 pre->g_pre_comp[i][4][2]);
2314 /*
2315 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2316 */
2317 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2318 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2319 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2320 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2321 pre->g_pre_comp[i][2][2]);
2322 for (j = 1; j < 8; ++j) {
2323 /* odd multiples: add G resp. 2^32*G */
2324 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2325 pre->g_pre_comp[i][2 * j + 1][1],
2326 pre->g_pre_comp[i][2 * j + 1][2],
2327 pre->g_pre_comp[i][2 * j][0],
2328 pre->g_pre_comp[i][2 * j][1],
2329 pre->g_pre_comp[i][2 * j][2],
2330 pre->g_pre_comp[i][1][0],
2331 pre->g_pre_comp[i][1][1],
2332 pre->g_pre_comp[i][1][2]);
2333 }
2334 }
2335 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2336
2337 done:
2338 SETPRECOMP(group, nistp256, pre);
2339 pre = NULL;
2340 ret = 1;
2341
2342 err:
2343 BN_CTX_end(ctx);
2344 EC_POINT_free(generator);
2345 BN_CTX_free(new_ctx);
2346 EC_nistp256_pre_comp_free(pre);
2347 return ret;
2348 }
2349
2350 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2351 {
2352 return HAVEPRECOMP(group, nistp256);
2353 }
2354 #endif