]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp256.c
Update copyright year
[thirdparty/openssl.git] / crypto / ec / ecp_nistp256.c
1 /*
2 * Copyright 2011-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26 /*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30 #include "internal/deprecated.h"
31
32 /*
33 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
34 *
35 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
36 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
37 * work which got its smarts from Daniel J. Bernstein's work on the same.
38 */
39
40 #include <openssl/opensslconf.h>
41
42 #include <stdint.h>
43 #include <string.h>
44 #include <openssl/err.h>
45 #include "ec_local.h"
46
47 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
48 /* even with gcc, the typedef won't work for 32-bit platforms */
49 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
50 * platforms */
51 typedef __int128_t int128_t;
52 #else
53 # error "Your compiler doesn't appear to support 128-bit integer types"
54 #endif
55
56 typedef uint8_t u8;
57 typedef uint32_t u32;
58 typedef uint64_t u64;
59
60 /*
61 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
62 * can serialize an element of this field into 32 bytes. We call this an
63 * felem_bytearray.
64 */
65
66 typedef u8 felem_bytearray[32];
67
68 /*
69 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
70 * values are big-endian.
71 */
72 static const felem_bytearray nistp256_curve_params[5] = {
73 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
74 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
75 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
77 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
78 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
79 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},
81 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */
82 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
83 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
84 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
85 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
86 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
87 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
88 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
89 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
90 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
91 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
92 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
93 };
94
95 /*-
96 * The representation of field elements.
97 * ------------------------------------
98 *
99 * We represent field elements with either four 128-bit values, eight 128-bit
100 * values, or four 64-bit values. The field element represented is:
101 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
102 * or:
103 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
104 *
105 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
106 * apart, but are 128-bits wide, the most significant bits of each limb overlap
107 * with the least significant bits of the next.
108 *
109 * A field element with four limbs is an 'felem'. One with eight limbs is a
110 * 'longfelem'
111 *
112 * A field element with four, 64-bit values is called a 'smallfelem'. Small
113 * values are used as intermediate values before multiplication.
114 */
115
116 #define NLIMBS 4
117
118 typedef uint128_t limb;
119 typedef limb felem[NLIMBS];
120 typedef limb longfelem[NLIMBS * 2];
121 typedef u64 smallfelem[NLIMBS];
122
123 /* This is the value of the prime as four 64-bit words, little-endian. */
124 static const u64 kPrime[4] =
125 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
126 static const u64 bottom63bits = 0x7ffffffffffffffful;
127
128 /*
129 * bin32_to_felem takes a little-endian byte array and converts it into felem
130 * form. This assumes that the CPU is little-endian.
131 */
132 static void bin32_to_felem(felem out, const u8 in[32])
133 {
134 out[0] = *((u64 *)&in[0]);
135 out[1] = *((u64 *)&in[8]);
136 out[2] = *((u64 *)&in[16]);
137 out[3] = *((u64 *)&in[24]);
138 }
139
140 /*
141 * smallfelem_to_bin32 takes a smallfelem and serializes into a little
142 * endian, 32 byte array. This assumes that the CPU is little-endian.
143 */
144 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
145 {
146 *((u64 *)&out[0]) = in[0];
147 *((u64 *)&out[8]) = in[1];
148 *((u64 *)&out[16]) = in[2];
149 *((u64 *)&out[24]) = in[3];
150 }
151
152 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
153 static int BN_to_felem(felem out, const BIGNUM *bn)
154 {
155 felem_bytearray b_out;
156 int num_bytes;
157
158 if (BN_is_negative(bn)) {
159 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
160 return 0;
161 }
162 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
163 if (num_bytes < 0) {
164 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
165 return 0;
166 }
167 bin32_to_felem(out, b_out);
168 return 1;
169 }
170
171 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
172 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
173 {
174 felem_bytearray b_out;
175 smallfelem_to_bin32(b_out, in);
176 return BN_lebin2bn(b_out, sizeof(b_out), out);
177 }
178
179 /*-
180 * Field operations
181 * ----------------
182 */
183
184 static void smallfelem_one(smallfelem out)
185 {
186 out[0] = 1;
187 out[1] = 0;
188 out[2] = 0;
189 out[3] = 0;
190 }
191
192 static void smallfelem_assign(smallfelem out, const smallfelem in)
193 {
194 out[0] = in[0];
195 out[1] = in[1];
196 out[2] = in[2];
197 out[3] = in[3];
198 }
199
200 static void felem_assign(felem out, const felem in)
201 {
202 out[0] = in[0];
203 out[1] = in[1];
204 out[2] = in[2];
205 out[3] = in[3];
206 }
207
208 /* felem_sum sets out = out + in. */
209 static void felem_sum(felem out, const felem in)
210 {
211 out[0] += in[0];
212 out[1] += in[1];
213 out[2] += in[2];
214 out[3] += in[3];
215 }
216
217 /* felem_small_sum sets out = out + in. */
218 static void felem_small_sum(felem out, const smallfelem in)
219 {
220 out[0] += in[0];
221 out[1] += in[1];
222 out[2] += in[2];
223 out[3] += in[3];
224 }
225
226 /* felem_scalar sets out = out * scalar */
227 static void felem_scalar(felem out, const u64 scalar)
228 {
229 out[0] *= scalar;
230 out[1] *= scalar;
231 out[2] *= scalar;
232 out[3] *= scalar;
233 }
234
235 /* longfelem_scalar sets out = out * scalar */
236 static void longfelem_scalar(longfelem out, const u64 scalar)
237 {
238 out[0] *= scalar;
239 out[1] *= scalar;
240 out[2] *= scalar;
241 out[3] *= scalar;
242 out[4] *= scalar;
243 out[5] *= scalar;
244 out[6] *= scalar;
245 out[7] *= scalar;
246 }
247
248 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
249 #define two105 (((limb)1) << 105)
250 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
251
252 /* zero105 is 0 mod p */
253 static const felem zero105 =
254 { two105m41m9, two105, two105m41p9, two105m41p9 };
255
256 /*-
257 * smallfelem_neg sets |out| to |-small|
258 * On exit:
259 * out[i] < out[i] + 2^105
260 */
261 static void smallfelem_neg(felem out, const smallfelem small)
262 {
263 /* In order to prevent underflow, we subtract from 0 mod p. */
264 out[0] = zero105[0] - small[0];
265 out[1] = zero105[1] - small[1];
266 out[2] = zero105[2] - small[2];
267 out[3] = zero105[3] - small[3];
268 }
269
270 /*-
271 * felem_diff subtracts |in| from |out|
272 * On entry:
273 * in[i] < 2^104
274 * On exit:
275 * out[i] < out[i] + 2^105
276 */
277 static void felem_diff(felem out, const felem in)
278 {
279 /*
280 * In order to prevent underflow, we add 0 mod p before subtracting.
281 */
282 out[0] += zero105[0];
283 out[1] += zero105[1];
284 out[2] += zero105[2];
285 out[3] += zero105[3];
286
287 out[0] -= in[0];
288 out[1] -= in[1];
289 out[2] -= in[2];
290 out[3] -= in[3];
291 }
292
293 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
294 #define two107 (((limb)1) << 107)
295 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
296
297 /* zero107 is 0 mod p */
298 static const felem zero107 =
299 { two107m43m11, two107, two107m43p11, two107m43p11 };
300
301 /*-
302 * An alternative felem_diff for larger inputs |in|
303 * felem_diff_zero107 subtracts |in| from |out|
304 * On entry:
305 * in[i] < 2^106
306 * On exit:
307 * out[i] < out[i] + 2^107
308 */
309 static void felem_diff_zero107(felem out, const felem in)
310 {
311 /*
312 * In order to prevent underflow, we add 0 mod p before subtracting.
313 */
314 out[0] += zero107[0];
315 out[1] += zero107[1];
316 out[2] += zero107[2];
317 out[3] += zero107[3];
318
319 out[0] -= in[0];
320 out[1] -= in[1];
321 out[2] -= in[2];
322 out[3] -= in[3];
323 }
324
325 /*-
326 * longfelem_diff subtracts |in| from |out|
327 * On entry:
328 * in[i] < 7*2^67
329 * On exit:
330 * out[i] < out[i] + 2^70 + 2^40
331 */
332 static void longfelem_diff(longfelem out, const longfelem in)
333 {
334 static const limb two70m8p6 =
335 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
336 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
337 static const limb two70 = (((limb) 1) << 70);
338 static const limb two70m40m38p6 =
339 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
340 (((limb) 1) << 6);
341 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
342
343 /* add 0 mod p to avoid underflow */
344 out[0] += two70m8p6;
345 out[1] += two70p40;
346 out[2] += two70;
347 out[3] += two70m40m38p6;
348 out[4] += two70m6;
349 out[5] += two70m6;
350 out[6] += two70m6;
351 out[7] += two70m6;
352
353 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
354 out[0] -= in[0];
355 out[1] -= in[1];
356 out[2] -= in[2];
357 out[3] -= in[3];
358 out[4] -= in[4];
359 out[5] -= in[5];
360 out[6] -= in[6];
361 out[7] -= in[7];
362 }
363
364 #define two64m0 (((limb)1) << 64) - 1
365 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
366 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
367 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
368
369 /* zero110 is 0 mod p */
370 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
371
372 /*-
373 * felem_shrink converts an felem into a smallfelem. The result isn't quite
374 * minimal as the value may be greater than p.
375 *
376 * On entry:
377 * in[i] < 2^109
378 * On exit:
379 * out[i] < 2^64
380 */
381 static void felem_shrink(smallfelem out, const felem in)
382 {
383 felem tmp;
384 u64 a, b, mask;
385 u64 high, low;
386 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
387
388 /* Carry 2->3 */
389 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
390 /* tmp[3] < 2^110 */
391
392 tmp[2] = zero110[2] + (u64)in[2];
393 tmp[0] = zero110[0] + in[0];
394 tmp[1] = zero110[1] + in[1];
395 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
396
397 /*
398 * We perform two partial reductions where we eliminate the high-word of
399 * tmp[3]. We don't update the other words till the end.
400 */
401 a = tmp[3] >> 64; /* a < 2^46 */
402 tmp[3] = (u64)tmp[3];
403 tmp[3] -= a;
404 tmp[3] += ((limb) a) << 32;
405 /* tmp[3] < 2^79 */
406
407 b = a;
408 a = tmp[3] >> 64; /* a < 2^15 */
409 b += a; /* b < 2^46 + 2^15 < 2^47 */
410 tmp[3] = (u64)tmp[3];
411 tmp[3] -= a;
412 tmp[3] += ((limb) a) << 32;
413 /* tmp[3] < 2^64 + 2^47 */
414
415 /*
416 * This adjusts the other two words to complete the two partial
417 * reductions.
418 */
419 tmp[0] += b;
420 tmp[1] -= (((limb) b) << 32);
421
422 /*
423 * In order to make space in tmp[3] for the carry from 2 -> 3, we
424 * conditionally subtract kPrime if tmp[3] is large enough.
425 */
426 high = (u64)(tmp[3] >> 64);
427 /* As tmp[3] < 2^65, high is either 1 or 0 */
428 high = 0 - high;
429 /*-
430 * high is:
431 * all ones if the high word of tmp[3] is 1
432 * all zeros if the high word of tmp[3] if 0
433 */
434 low = (u64)tmp[3];
435 mask = 0 - (low >> 63);
436 /*-
437 * mask is:
438 * all ones if the MSB of low is 1
439 * all zeros if the MSB of low if 0
440 */
441 low &= bottom63bits;
442 low -= kPrime3Test;
443 /* if low was greater than kPrime3Test then the MSB is zero */
444 low = ~low;
445 low = 0 - (low >> 63);
446 /*-
447 * low is:
448 * all ones if low was > kPrime3Test
449 * all zeros if low was <= kPrime3Test
450 */
451 mask = (mask & low) | high;
452 tmp[0] -= mask & kPrime[0];
453 tmp[1] -= mask & kPrime[1];
454 /* kPrime[2] is zero, so omitted */
455 tmp[3] -= mask & kPrime[3];
456 /* tmp[3] < 2**64 - 2**32 + 1 */
457
458 tmp[1] += ((u64)(tmp[0] >> 64));
459 tmp[0] = (u64)tmp[0];
460 tmp[2] += ((u64)(tmp[1] >> 64));
461 tmp[1] = (u64)tmp[1];
462 tmp[3] += ((u64)(tmp[2] >> 64));
463 tmp[2] = (u64)tmp[2];
464 /* tmp[i] < 2^64 */
465
466 out[0] = tmp[0];
467 out[1] = tmp[1];
468 out[2] = tmp[2];
469 out[3] = tmp[3];
470 }
471
472 /* smallfelem_expand converts a smallfelem to an felem */
473 static void smallfelem_expand(felem out, const smallfelem in)
474 {
475 out[0] = in[0];
476 out[1] = in[1];
477 out[2] = in[2];
478 out[3] = in[3];
479 }
480
481 /*-
482 * smallfelem_square sets |out| = |small|^2
483 * On entry:
484 * small[i] < 2^64
485 * On exit:
486 * out[i] < 7 * 2^64 < 2^67
487 */
488 static void smallfelem_square(longfelem out, const smallfelem small)
489 {
490 limb a;
491 u64 high, low;
492
493 a = ((uint128_t) small[0]) * small[0];
494 low = a;
495 high = a >> 64;
496 out[0] = low;
497 out[1] = high;
498
499 a = ((uint128_t) small[0]) * small[1];
500 low = a;
501 high = a >> 64;
502 out[1] += low;
503 out[1] += low;
504 out[2] = high;
505
506 a = ((uint128_t) small[0]) * small[2];
507 low = a;
508 high = a >> 64;
509 out[2] += low;
510 out[2] *= 2;
511 out[3] = high;
512
513 a = ((uint128_t) small[0]) * small[3];
514 low = a;
515 high = a >> 64;
516 out[3] += low;
517 out[4] = high;
518
519 a = ((uint128_t) small[1]) * small[2];
520 low = a;
521 high = a >> 64;
522 out[3] += low;
523 out[3] *= 2;
524 out[4] += high;
525
526 a = ((uint128_t) small[1]) * small[1];
527 low = a;
528 high = a >> 64;
529 out[2] += low;
530 out[3] += high;
531
532 a = ((uint128_t) small[1]) * small[3];
533 low = a;
534 high = a >> 64;
535 out[4] += low;
536 out[4] *= 2;
537 out[5] = high;
538
539 a = ((uint128_t) small[2]) * small[3];
540 low = a;
541 high = a >> 64;
542 out[5] += low;
543 out[5] *= 2;
544 out[6] = high;
545 out[6] += high;
546
547 a = ((uint128_t) small[2]) * small[2];
548 low = a;
549 high = a >> 64;
550 out[4] += low;
551 out[5] += high;
552
553 a = ((uint128_t) small[3]) * small[3];
554 low = a;
555 high = a >> 64;
556 out[6] += low;
557 out[7] = high;
558 }
559
560 /*-
561 * felem_square sets |out| = |in|^2
562 * On entry:
563 * in[i] < 2^109
564 * On exit:
565 * out[i] < 7 * 2^64 < 2^67
566 */
567 static void felem_square(longfelem out, const felem in)
568 {
569 u64 small[4];
570 felem_shrink(small, in);
571 smallfelem_square(out, small);
572 }
573
574 /*-
575 * smallfelem_mul sets |out| = |small1| * |small2|
576 * On entry:
577 * small1[i] < 2^64
578 * small2[i] < 2^64
579 * On exit:
580 * out[i] < 7 * 2^64 < 2^67
581 */
582 static void smallfelem_mul(longfelem out, const smallfelem small1,
583 const smallfelem small2)
584 {
585 limb a;
586 u64 high, low;
587
588 a = ((uint128_t) small1[0]) * small2[0];
589 low = a;
590 high = a >> 64;
591 out[0] = low;
592 out[1] = high;
593
594 a = ((uint128_t) small1[0]) * small2[1];
595 low = a;
596 high = a >> 64;
597 out[1] += low;
598 out[2] = high;
599
600 a = ((uint128_t) small1[1]) * small2[0];
601 low = a;
602 high = a >> 64;
603 out[1] += low;
604 out[2] += high;
605
606 a = ((uint128_t) small1[0]) * small2[2];
607 low = a;
608 high = a >> 64;
609 out[2] += low;
610 out[3] = high;
611
612 a = ((uint128_t) small1[1]) * small2[1];
613 low = a;
614 high = a >> 64;
615 out[2] += low;
616 out[3] += high;
617
618 a = ((uint128_t) small1[2]) * small2[0];
619 low = a;
620 high = a >> 64;
621 out[2] += low;
622 out[3] += high;
623
624 a = ((uint128_t) small1[0]) * small2[3];
625 low = a;
626 high = a >> 64;
627 out[3] += low;
628 out[4] = high;
629
630 a = ((uint128_t) small1[1]) * small2[2];
631 low = a;
632 high = a >> 64;
633 out[3] += low;
634 out[4] += high;
635
636 a = ((uint128_t) small1[2]) * small2[1];
637 low = a;
638 high = a >> 64;
639 out[3] += low;
640 out[4] += high;
641
642 a = ((uint128_t) small1[3]) * small2[0];
643 low = a;
644 high = a >> 64;
645 out[3] += low;
646 out[4] += high;
647
648 a = ((uint128_t) small1[1]) * small2[3];
649 low = a;
650 high = a >> 64;
651 out[4] += low;
652 out[5] = high;
653
654 a = ((uint128_t) small1[2]) * small2[2];
655 low = a;
656 high = a >> 64;
657 out[4] += low;
658 out[5] += high;
659
660 a = ((uint128_t) small1[3]) * small2[1];
661 low = a;
662 high = a >> 64;
663 out[4] += low;
664 out[5] += high;
665
666 a = ((uint128_t) small1[2]) * small2[3];
667 low = a;
668 high = a >> 64;
669 out[5] += low;
670 out[6] = high;
671
672 a = ((uint128_t) small1[3]) * small2[2];
673 low = a;
674 high = a >> 64;
675 out[5] += low;
676 out[6] += high;
677
678 a = ((uint128_t) small1[3]) * small2[3];
679 low = a;
680 high = a >> 64;
681 out[6] += low;
682 out[7] = high;
683 }
684
685 /*-
686 * felem_mul sets |out| = |in1| * |in2|
687 * On entry:
688 * in1[i] < 2^109
689 * in2[i] < 2^109
690 * On exit:
691 * out[i] < 7 * 2^64 < 2^67
692 */
693 static void felem_mul(longfelem out, const felem in1, const felem in2)
694 {
695 smallfelem small1, small2;
696 felem_shrink(small1, in1);
697 felem_shrink(small2, in2);
698 smallfelem_mul(out, small1, small2);
699 }
700
701 /*-
702 * felem_small_mul sets |out| = |small1| * |in2|
703 * On entry:
704 * small1[i] < 2^64
705 * in2[i] < 2^109
706 * On exit:
707 * out[i] < 7 * 2^64 < 2^67
708 */
709 static void felem_small_mul(longfelem out, const smallfelem small1,
710 const felem in2)
711 {
712 smallfelem small2;
713 felem_shrink(small2, in2);
714 smallfelem_mul(out, small1, small2);
715 }
716
717 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
718 #define two100 (((limb)1) << 100)
719 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
720 /* zero100 is 0 mod p */
721 static const felem zero100 =
722 { two100m36m4, two100, two100m36p4, two100m36p4 };
723
724 /*-
725 * Internal function for the different flavours of felem_reduce.
726 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
727 * On entry:
728 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
729 * out[1] >= in[7] + 2^32*in[4]
730 * out[2] >= in[5] + 2^32*in[5]
731 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
732 * On exit:
733 * out[0] <= out[0] + in[4] + 2^32*in[5]
734 * out[1] <= out[1] + in[5] + 2^33*in[6]
735 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
736 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
737 */
738 static void felem_reduce_(felem out, const longfelem in)
739 {
740 int128_t c;
741 /* combine common terms from below */
742 c = in[4] + (in[5] << 32);
743 out[0] += c;
744 out[3] -= c;
745
746 c = in[5] - in[7];
747 out[1] += c;
748 out[2] -= c;
749
750 /* the remaining terms */
751 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
752 out[1] -= (in[4] << 32);
753 out[3] += (in[4] << 32);
754
755 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
756 out[2] -= (in[5] << 32);
757
758 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
759 out[0] -= in[6];
760 out[0] -= (in[6] << 32);
761 out[1] += (in[6] << 33);
762 out[2] += (in[6] * 2);
763 out[3] -= (in[6] << 32);
764
765 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
766 out[0] -= in[7];
767 out[0] -= (in[7] << 32);
768 out[2] += (in[7] << 33);
769 out[3] += (in[7] * 3);
770 }
771
772 /*-
773 * felem_reduce converts a longfelem into an felem.
774 * To be called directly after felem_square or felem_mul.
775 * On entry:
776 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
777 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
778 * On exit:
779 * out[i] < 2^101
780 */
781 static void felem_reduce(felem out, const longfelem in)
782 {
783 out[0] = zero100[0] + in[0];
784 out[1] = zero100[1] + in[1];
785 out[2] = zero100[2] + in[2];
786 out[3] = zero100[3] + in[3];
787
788 felem_reduce_(out, in);
789
790 /*-
791 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
792 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
793 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
794 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
795 *
796 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
797 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
798 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
799 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
800 */
801 }
802
803 /*-
804 * felem_reduce_zero105 converts a larger longfelem into an felem.
805 * On entry:
806 * in[0] < 2^71
807 * On exit:
808 * out[i] < 2^106
809 */
810 static void felem_reduce_zero105(felem out, const longfelem in)
811 {
812 out[0] = zero105[0] + in[0];
813 out[1] = zero105[1] + in[1];
814 out[2] = zero105[2] + in[2];
815 out[3] = zero105[3] + in[3];
816
817 felem_reduce_(out, in);
818
819 /*-
820 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
821 * out[1] > 2^105 - 2^71 - 2^103 > 0
822 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
823 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
824 *
825 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
826 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
827 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
828 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
829 */
830 }
831
832 /*
833 * subtract_u64 sets *result = *result - v and *carry to one if the
834 * subtraction underflowed.
835 */
836 static void subtract_u64(u64 *result, u64 *carry, u64 v)
837 {
838 uint128_t r = *result;
839 r -= v;
840 *carry = (r >> 64) & 1;
841 *result = (u64)r;
842 }
843
844 /*
845 * felem_contract converts |in| to its unique, minimal representation. On
846 * entry: in[i] < 2^109
847 */
848 static void felem_contract(smallfelem out, const felem in)
849 {
850 unsigned i;
851 u64 all_equal_so_far = 0, result = 0, carry;
852
853 felem_shrink(out, in);
854 /* small is minimal except that the value might be > p */
855
856 all_equal_so_far--;
857 /*
858 * We are doing a constant time test if out >= kPrime. We need to compare
859 * each u64, from most-significant to least significant. For each one, if
860 * all words so far have been equal (m is all ones) then a non-equal
861 * result is the answer. Otherwise we continue.
862 */
863 for (i = 3; i < 4; i--) {
864 u64 equal;
865 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
866 /*
867 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
868 * will all be set.
869 */
870 result |= all_equal_so_far & ((u64)(a >> 64));
871
872 /*
873 * if kPrime[i] == out[i] then |equal| will be all zeros and the
874 * decrement will make it all ones.
875 */
876 equal = kPrime[i] ^ out[i];
877 equal--;
878 equal &= equal << 32;
879 equal &= equal << 16;
880 equal &= equal << 8;
881 equal &= equal << 4;
882 equal &= equal << 2;
883 equal &= equal << 1;
884 equal = 0 - (equal >> 63);
885
886 all_equal_so_far &= equal;
887 }
888
889 /*
890 * if all_equal_so_far is still all ones then the two values are equal
891 * and so out >= kPrime is true.
892 */
893 result |= all_equal_so_far;
894
895 /* if out >= kPrime then we subtract kPrime. */
896 subtract_u64(&out[0], &carry, result & kPrime[0]);
897 subtract_u64(&out[1], &carry, carry);
898 subtract_u64(&out[2], &carry, carry);
899 subtract_u64(&out[3], &carry, carry);
900
901 subtract_u64(&out[1], &carry, result & kPrime[1]);
902 subtract_u64(&out[2], &carry, carry);
903 subtract_u64(&out[3], &carry, carry);
904
905 subtract_u64(&out[2], &carry, result & kPrime[2]);
906 subtract_u64(&out[3], &carry, carry);
907
908 subtract_u64(&out[3], &carry, result & kPrime[3]);
909 }
910
911 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
912 {
913 longfelem longtmp;
914 felem tmp;
915
916 smallfelem_square(longtmp, in);
917 felem_reduce(tmp, longtmp);
918 felem_contract(out, tmp);
919 }
920
921 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
922 const smallfelem in2)
923 {
924 longfelem longtmp;
925 felem tmp;
926
927 smallfelem_mul(longtmp, in1, in2);
928 felem_reduce(tmp, longtmp);
929 felem_contract(out, tmp);
930 }
931
932 /*-
933 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
934 * otherwise.
935 * On entry:
936 * small[i] < 2^64
937 */
938 static limb smallfelem_is_zero(const smallfelem small)
939 {
940 limb result;
941 u64 is_p;
942
943 u64 is_zero = small[0] | small[1] | small[2] | small[3];
944 is_zero--;
945 is_zero &= is_zero << 32;
946 is_zero &= is_zero << 16;
947 is_zero &= is_zero << 8;
948 is_zero &= is_zero << 4;
949 is_zero &= is_zero << 2;
950 is_zero &= is_zero << 1;
951 is_zero = 0 - (is_zero >> 63);
952
953 is_p = (small[0] ^ kPrime[0]) |
954 (small[1] ^ kPrime[1]) |
955 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
956 is_p--;
957 is_p &= is_p << 32;
958 is_p &= is_p << 16;
959 is_p &= is_p << 8;
960 is_p &= is_p << 4;
961 is_p &= is_p << 2;
962 is_p &= is_p << 1;
963 is_p = 0 - (is_p >> 63);
964
965 is_zero |= is_p;
966
967 result = is_zero;
968 result |= ((limb) is_zero) << 64;
969 return result;
970 }
971
972 static int smallfelem_is_zero_int(const void *small)
973 {
974 return (int)(smallfelem_is_zero(small) & ((limb) 1));
975 }
976
977 /*-
978 * felem_inv calculates |out| = |in|^{-1}
979 *
980 * Based on Fermat's Little Theorem:
981 * a^p = a (mod p)
982 * a^{p-1} = 1 (mod p)
983 * a^{p-2} = a^{-1} (mod p)
984 */
985 static void felem_inv(felem out, const felem in)
986 {
987 felem ftmp, ftmp2;
988 /* each e_I will hold |in|^{2^I - 1} */
989 felem e2, e4, e8, e16, e32, e64;
990 longfelem tmp;
991 unsigned i;
992
993 felem_square(tmp, in);
994 felem_reduce(ftmp, tmp); /* 2^1 */
995 felem_mul(tmp, in, ftmp);
996 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
997 felem_assign(e2, ftmp);
998 felem_square(tmp, ftmp);
999 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
1000 felem_square(tmp, ftmp);
1001 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1002 felem_mul(tmp, ftmp, e2);
1003 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1004 felem_assign(e4, ftmp);
1005 felem_square(tmp, ftmp);
1006 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1007 felem_square(tmp, ftmp);
1008 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1009 felem_square(tmp, ftmp);
1010 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1011 felem_square(tmp, ftmp);
1012 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1013 felem_mul(tmp, ftmp, e4);
1014 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1015 felem_assign(e8, ftmp);
1016 for (i = 0; i < 8; i++) {
1017 felem_square(tmp, ftmp);
1018 felem_reduce(ftmp, tmp);
1019 } /* 2^16 - 2^8 */
1020 felem_mul(tmp, ftmp, e8);
1021 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1022 felem_assign(e16, ftmp);
1023 for (i = 0; i < 16; i++) {
1024 felem_square(tmp, ftmp);
1025 felem_reduce(ftmp, tmp);
1026 } /* 2^32 - 2^16 */
1027 felem_mul(tmp, ftmp, e16);
1028 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1029 felem_assign(e32, ftmp);
1030 for (i = 0; i < 32; i++) {
1031 felem_square(tmp, ftmp);
1032 felem_reduce(ftmp, tmp);
1033 } /* 2^64 - 2^32 */
1034 felem_assign(e64, ftmp);
1035 felem_mul(tmp, ftmp, in);
1036 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1037 for (i = 0; i < 192; i++) {
1038 felem_square(tmp, ftmp);
1039 felem_reduce(ftmp, tmp);
1040 } /* 2^256 - 2^224 + 2^192 */
1041
1042 felem_mul(tmp, e64, e32);
1043 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1044 for (i = 0; i < 16; i++) {
1045 felem_square(tmp, ftmp2);
1046 felem_reduce(ftmp2, tmp);
1047 } /* 2^80 - 2^16 */
1048 felem_mul(tmp, ftmp2, e16);
1049 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1050 for (i = 0; i < 8; i++) {
1051 felem_square(tmp, ftmp2);
1052 felem_reduce(ftmp2, tmp);
1053 } /* 2^88 - 2^8 */
1054 felem_mul(tmp, ftmp2, e8);
1055 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1056 for (i = 0; i < 4; i++) {
1057 felem_square(tmp, ftmp2);
1058 felem_reduce(ftmp2, tmp);
1059 } /* 2^92 - 2^4 */
1060 felem_mul(tmp, ftmp2, e4);
1061 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1062 felem_square(tmp, ftmp2);
1063 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1064 felem_square(tmp, ftmp2);
1065 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1066 felem_mul(tmp, ftmp2, e2);
1067 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1068 felem_square(tmp, ftmp2);
1069 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1070 felem_square(tmp, ftmp2);
1071 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1072 felem_mul(tmp, ftmp2, in);
1073 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1074
1075 felem_mul(tmp, ftmp2, ftmp);
1076 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1077 }
1078
1079 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1080 {
1081 felem tmp;
1082
1083 smallfelem_expand(tmp, in);
1084 felem_inv(tmp, tmp);
1085 felem_contract(out, tmp);
1086 }
1087
1088 /*-
1089 * Group operations
1090 * ----------------
1091 *
1092 * Building on top of the field operations we have the operations on the
1093 * elliptic curve group itself. Points on the curve are represented in Jacobian
1094 * coordinates
1095 */
1096
1097 /*-
1098 * point_double calculates 2*(x_in, y_in, z_in)
1099 *
1100 * The method is taken from:
1101 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1102 *
1103 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1104 * while x_out == y_in is not (maybe this works, but it's not tested).
1105 */
1106 static void
1107 point_double(felem x_out, felem y_out, felem z_out,
1108 const felem x_in, const felem y_in, const felem z_in)
1109 {
1110 longfelem tmp, tmp2;
1111 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1112 smallfelem small1, small2;
1113
1114 felem_assign(ftmp, x_in);
1115 /* ftmp[i] < 2^106 */
1116 felem_assign(ftmp2, x_in);
1117 /* ftmp2[i] < 2^106 */
1118
1119 /* delta = z^2 */
1120 felem_square(tmp, z_in);
1121 felem_reduce(delta, tmp);
1122 /* delta[i] < 2^101 */
1123
1124 /* gamma = y^2 */
1125 felem_square(tmp, y_in);
1126 felem_reduce(gamma, tmp);
1127 /* gamma[i] < 2^101 */
1128 felem_shrink(small1, gamma);
1129
1130 /* beta = x*gamma */
1131 felem_small_mul(tmp, small1, x_in);
1132 felem_reduce(beta, tmp);
1133 /* beta[i] < 2^101 */
1134
1135 /* alpha = 3*(x-delta)*(x+delta) */
1136 felem_diff(ftmp, delta);
1137 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1138 felem_sum(ftmp2, delta);
1139 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1140 felem_scalar(ftmp2, 3);
1141 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1142 felem_mul(tmp, ftmp, ftmp2);
1143 felem_reduce(alpha, tmp);
1144 /* alpha[i] < 2^101 */
1145 felem_shrink(small2, alpha);
1146
1147 /* x' = alpha^2 - 8*beta */
1148 smallfelem_square(tmp, small2);
1149 felem_reduce(x_out, tmp);
1150 felem_assign(ftmp, beta);
1151 felem_scalar(ftmp, 8);
1152 /* ftmp[i] < 8 * 2^101 = 2^104 */
1153 felem_diff(x_out, ftmp);
1154 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1155
1156 /* z' = (y + z)^2 - gamma - delta */
1157 felem_sum(delta, gamma);
1158 /* delta[i] < 2^101 + 2^101 = 2^102 */
1159 felem_assign(ftmp, y_in);
1160 felem_sum(ftmp, z_in);
1161 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1162 felem_square(tmp, ftmp);
1163 felem_reduce(z_out, tmp);
1164 felem_diff(z_out, delta);
1165 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1166
1167 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1168 felem_scalar(beta, 4);
1169 /* beta[i] < 4 * 2^101 = 2^103 */
1170 felem_diff_zero107(beta, x_out);
1171 /* beta[i] < 2^107 + 2^103 < 2^108 */
1172 felem_small_mul(tmp, small2, beta);
1173 /* tmp[i] < 7 * 2^64 < 2^67 */
1174 smallfelem_square(tmp2, small1);
1175 /* tmp2[i] < 7 * 2^64 */
1176 longfelem_scalar(tmp2, 8);
1177 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1178 longfelem_diff(tmp, tmp2);
1179 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1180 felem_reduce_zero105(y_out, tmp);
1181 /* y_out[i] < 2^106 */
1182 }
1183
1184 /*
1185 * point_double_small is the same as point_double, except that it operates on
1186 * smallfelems
1187 */
1188 static void
1189 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1190 const smallfelem x_in, const smallfelem y_in,
1191 const smallfelem z_in)
1192 {
1193 felem felem_x_out, felem_y_out, felem_z_out;
1194 felem felem_x_in, felem_y_in, felem_z_in;
1195
1196 smallfelem_expand(felem_x_in, x_in);
1197 smallfelem_expand(felem_y_in, y_in);
1198 smallfelem_expand(felem_z_in, z_in);
1199 point_double(felem_x_out, felem_y_out, felem_z_out,
1200 felem_x_in, felem_y_in, felem_z_in);
1201 felem_shrink(x_out, felem_x_out);
1202 felem_shrink(y_out, felem_y_out);
1203 felem_shrink(z_out, felem_z_out);
1204 }
1205
1206 /* copy_conditional copies in to out iff mask is all ones. */
1207 static void copy_conditional(felem out, const felem in, limb mask)
1208 {
1209 unsigned i;
1210 for (i = 0; i < NLIMBS; ++i) {
1211 const limb tmp = mask & (in[i] ^ out[i]);
1212 out[i] ^= tmp;
1213 }
1214 }
1215
1216 /* copy_small_conditional copies in to out iff mask is all ones. */
1217 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1218 {
1219 unsigned i;
1220 const u64 mask64 = mask;
1221 for (i = 0; i < NLIMBS; ++i) {
1222 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1223 }
1224 }
1225
1226 /*-
1227 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1228 *
1229 * The method is taken from:
1230 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1231 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1232 *
1233 * This function includes a branch for checking whether the two input points
1234 * are equal, (while not equal to the point at infinity). This case never
1235 * happens during single point multiplication, so there is no timing leak for
1236 * ECDH or ECDSA signing.
1237 */
1238 static void point_add(felem x3, felem y3, felem z3,
1239 const felem x1, const felem y1, const felem z1,
1240 const int mixed, const smallfelem x2,
1241 const smallfelem y2, const smallfelem z2)
1242 {
1243 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1244 longfelem tmp, tmp2;
1245 smallfelem small1, small2, small3, small4, small5;
1246 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1247 limb points_equal;
1248
1249 felem_shrink(small3, z1);
1250
1251 z1_is_zero = smallfelem_is_zero(small3);
1252 z2_is_zero = smallfelem_is_zero(z2);
1253
1254 /* ftmp = z1z1 = z1**2 */
1255 smallfelem_square(tmp, small3);
1256 felem_reduce(ftmp, tmp);
1257 /* ftmp[i] < 2^101 */
1258 felem_shrink(small1, ftmp);
1259
1260 if (!mixed) {
1261 /* ftmp2 = z2z2 = z2**2 */
1262 smallfelem_square(tmp, z2);
1263 felem_reduce(ftmp2, tmp);
1264 /* ftmp2[i] < 2^101 */
1265 felem_shrink(small2, ftmp2);
1266
1267 felem_shrink(small5, x1);
1268
1269 /* u1 = ftmp3 = x1*z2z2 */
1270 smallfelem_mul(tmp, small5, small2);
1271 felem_reduce(ftmp3, tmp);
1272 /* ftmp3[i] < 2^101 */
1273
1274 /* ftmp5 = z1 + z2 */
1275 felem_assign(ftmp5, z1);
1276 felem_small_sum(ftmp5, z2);
1277 /* ftmp5[i] < 2^107 */
1278
1279 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1280 felem_square(tmp, ftmp5);
1281 felem_reduce(ftmp5, tmp);
1282 /* ftmp2 = z2z2 + z1z1 */
1283 felem_sum(ftmp2, ftmp);
1284 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1285 felem_diff(ftmp5, ftmp2);
1286 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1287
1288 /* ftmp2 = z2 * z2z2 */
1289 smallfelem_mul(tmp, small2, z2);
1290 felem_reduce(ftmp2, tmp);
1291
1292 /* s1 = ftmp2 = y1 * z2**3 */
1293 felem_mul(tmp, y1, ftmp2);
1294 felem_reduce(ftmp6, tmp);
1295 /* ftmp6[i] < 2^101 */
1296 } else {
1297 /*
1298 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1299 */
1300
1301 /* u1 = ftmp3 = x1*z2z2 */
1302 felem_assign(ftmp3, x1);
1303 /* ftmp3[i] < 2^106 */
1304
1305 /* ftmp5 = 2z1z2 */
1306 felem_assign(ftmp5, z1);
1307 felem_scalar(ftmp5, 2);
1308 /* ftmp5[i] < 2*2^106 = 2^107 */
1309
1310 /* s1 = ftmp2 = y1 * z2**3 */
1311 felem_assign(ftmp6, y1);
1312 /* ftmp6[i] < 2^106 */
1313 }
1314
1315 /* u2 = x2*z1z1 */
1316 smallfelem_mul(tmp, x2, small1);
1317 felem_reduce(ftmp4, tmp);
1318
1319 /* h = ftmp4 = u2 - u1 */
1320 felem_diff_zero107(ftmp4, ftmp3);
1321 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1322 felem_shrink(small4, ftmp4);
1323
1324 x_equal = smallfelem_is_zero(small4);
1325
1326 /* z_out = ftmp5 * h */
1327 felem_small_mul(tmp, small4, ftmp5);
1328 felem_reduce(z_out, tmp);
1329 /* z_out[i] < 2^101 */
1330
1331 /* ftmp = z1 * z1z1 */
1332 smallfelem_mul(tmp, small1, small3);
1333 felem_reduce(ftmp, tmp);
1334
1335 /* s2 = tmp = y2 * z1**3 */
1336 felem_small_mul(tmp, y2, ftmp);
1337 felem_reduce(ftmp5, tmp);
1338
1339 /* r = ftmp5 = (s2 - s1)*2 */
1340 felem_diff_zero107(ftmp5, ftmp6);
1341 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1342 felem_scalar(ftmp5, 2);
1343 /* ftmp5[i] < 2^109 */
1344 felem_shrink(small1, ftmp5);
1345 y_equal = smallfelem_is_zero(small1);
1346
1347 /*
1348 * The formulae are incorrect if the points are equal, in affine coordinates
1349 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1350 * happens.
1351 *
1352 * We use bitwise operations to avoid potential side-channels introduced by
1353 * the short-circuiting behaviour of boolean operators.
1354 *
1355 * The special case of either point being the point at infinity (z1 and/or
1356 * z2 are zero), is handled separately later on in this function, so we
1357 * avoid jumping to point_double here in those special cases.
1358 */
1359 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1360
1361 if (points_equal) {
1362 /*
1363 * This is obviously not constant-time but, as mentioned before, this
1364 * case never happens during single point multiplication, so there is no
1365 * timing leak for ECDH or ECDSA signing.
1366 */
1367 point_double(x3, y3, z3, x1, y1, z1);
1368 return;
1369 }
1370
1371 /* I = ftmp = (2h)**2 */
1372 felem_assign(ftmp, ftmp4);
1373 felem_scalar(ftmp, 2);
1374 /* ftmp[i] < 2*2^108 = 2^109 */
1375 felem_square(tmp, ftmp);
1376 felem_reduce(ftmp, tmp);
1377
1378 /* J = ftmp2 = h * I */
1379 felem_mul(tmp, ftmp4, ftmp);
1380 felem_reduce(ftmp2, tmp);
1381
1382 /* V = ftmp4 = U1 * I */
1383 felem_mul(tmp, ftmp3, ftmp);
1384 felem_reduce(ftmp4, tmp);
1385
1386 /* x_out = r**2 - J - 2V */
1387 smallfelem_square(tmp, small1);
1388 felem_reduce(x_out, tmp);
1389 felem_assign(ftmp3, ftmp4);
1390 felem_scalar(ftmp4, 2);
1391 felem_sum(ftmp4, ftmp2);
1392 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1393 felem_diff(x_out, ftmp4);
1394 /* x_out[i] < 2^105 + 2^101 */
1395
1396 /* y_out = r(V-x_out) - 2 * s1 * J */
1397 felem_diff_zero107(ftmp3, x_out);
1398 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1399 felem_small_mul(tmp, small1, ftmp3);
1400 felem_mul(tmp2, ftmp6, ftmp2);
1401 longfelem_scalar(tmp2, 2);
1402 /* tmp2[i] < 2*2^67 = 2^68 */
1403 longfelem_diff(tmp, tmp2);
1404 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1405 felem_reduce_zero105(y_out, tmp);
1406 /* y_out[i] < 2^106 */
1407
1408 copy_small_conditional(x_out, x2, z1_is_zero);
1409 copy_conditional(x_out, x1, z2_is_zero);
1410 copy_small_conditional(y_out, y2, z1_is_zero);
1411 copy_conditional(y_out, y1, z2_is_zero);
1412 copy_small_conditional(z_out, z2, z1_is_zero);
1413 copy_conditional(z_out, z1, z2_is_zero);
1414 felem_assign(x3, x_out);
1415 felem_assign(y3, y_out);
1416 felem_assign(z3, z_out);
1417 }
1418
1419 /*
1420 * point_add_small is the same as point_add, except that it operates on
1421 * smallfelems
1422 */
1423 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1424 smallfelem x1, smallfelem y1, smallfelem z1,
1425 smallfelem x2, smallfelem y2, smallfelem z2)
1426 {
1427 felem felem_x3, felem_y3, felem_z3;
1428 felem felem_x1, felem_y1, felem_z1;
1429 smallfelem_expand(felem_x1, x1);
1430 smallfelem_expand(felem_y1, y1);
1431 smallfelem_expand(felem_z1, z1);
1432 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1433 x2, y2, z2);
1434 felem_shrink(x3, felem_x3);
1435 felem_shrink(y3, felem_y3);
1436 felem_shrink(z3, felem_z3);
1437 }
1438
1439 /*-
1440 * Base point pre computation
1441 * --------------------------
1442 *
1443 * Two different sorts of precomputed tables are used in the following code.
1444 * Each contain various points on the curve, where each point is three field
1445 * elements (x, y, z).
1446 *
1447 * For the base point table, z is usually 1 (0 for the point at infinity).
1448 * This table has 2 * 16 elements, starting with the following:
1449 * index | bits | point
1450 * ------+---------+------------------------------
1451 * 0 | 0 0 0 0 | 0G
1452 * 1 | 0 0 0 1 | 1G
1453 * 2 | 0 0 1 0 | 2^64G
1454 * 3 | 0 0 1 1 | (2^64 + 1)G
1455 * 4 | 0 1 0 0 | 2^128G
1456 * 5 | 0 1 0 1 | (2^128 + 1)G
1457 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1458 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1459 * 8 | 1 0 0 0 | 2^192G
1460 * 9 | 1 0 0 1 | (2^192 + 1)G
1461 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1462 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1463 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1464 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1465 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1466 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1467 * followed by a copy of this with each element multiplied by 2^32.
1468 *
1469 * The reason for this is so that we can clock bits into four different
1470 * locations when doing simple scalar multiplies against the base point,
1471 * and then another four locations using the second 16 elements.
1472 *
1473 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1474
1475 /* gmul is the table of precomputed base points */
1476 static const smallfelem gmul[2][16][3] = {
1477 {{{0, 0, 0, 0},
1478 {0, 0, 0, 0},
1479 {0, 0, 0, 0}},
1480 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1481 0x6b17d1f2e12c4247},
1482 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1483 0x4fe342e2fe1a7f9b},
1484 {1, 0, 0, 0}},
1485 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1486 0x0fa822bc2811aaa5},
1487 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1488 0xbff44ae8f5dba80d},
1489 {1, 0, 0, 0}},
1490 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1491 0x300a4bbc89d6726f},
1492 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1493 0x72aac7e0d09b4644},
1494 {1, 0, 0, 0}},
1495 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1496 0x447d739beedb5e67},
1497 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1498 0x2d4825ab834131ee},
1499 {1, 0, 0, 0}},
1500 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1501 0xef9519328a9c72ff},
1502 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1503 0x611e9fc37dbb2c9b},
1504 {1, 0, 0, 0}},
1505 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1506 0x550663797b51f5d8},
1507 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1508 0x157164848aecb851},
1509 {1, 0, 0, 0}},
1510 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1511 0xeb5d7745b21141ea},
1512 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1513 0xeafd72ebdbecc17b},
1514 {1, 0, 0, 0}},
1515 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1516 0xa6d39677a7849276},
1517 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1518 0x674f84749b0b8816},
1519 {1, 0, 0, 0}},
1520 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1521 0x4e769e7672c9ddad},
1522 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1523 0x42b99082de830663},
1524 {1, 0, 0, 0}},
1525 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1526 0x78878ef61c6ce04d},
1527 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1528 0xb6cb3f5d7b72c321},
1529 {1, 0, 0, 0}},
1530 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1531 0x0c88bc4d716b1287},
1532 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1533 0xdd5ddea3f3901dc6},
1534 {1, 0, 0, 0}},
1535 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1536 0x68f344af6b317466},
1537 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1538 0x31b9c405f8540a20},
1539 {1, 0, 0, 0}},
1540 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1541 0x4052bf4b6f461db9},
1542 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1543 0xfecf4d5190b0fc61},
1544 {1, 0, 0, 0}},
1545 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1546 0x1eddbae2c802e41a},
1547 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1548 0x43104d86560ebcfc},
1549 {1, 0, 0, 0}},
1550 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1551 0xb48e26b484f7a21c},
1552 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1553 0xfac015404d4d3dab},
1554 {1, 0, 0, 0}}},
1555 {{{0, 0, 0, 0},
1556 {0, 0, 0, 0},
1557 {0, 0, 0, 0}},
1558 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1559 0x7fe36b40af22af89},
1560 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1561 0xe697d45825b63624},
1562 {1, 0, 0, 0}},
1563 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1564 0x4a5b506612a677a6},
1565 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1566 0xeb13461ceac089f1},
1567 {1, 0, 0, 0}},
1568 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1569 0x0781b8291c6a220a},
1570 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1571 0x690cde8df0151593},
1572 {1, 0, 0, 0}},
1573 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1574 0x8a535f566ec73617},
1575 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1576 0x0455c08468b08bd7},
1577 {1, 0, 0, 0}},
1578 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1579 0x06bada7ab77f8276},
1580 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1581 0x5b476dfd0e6cb18a},
1582 {1, 0, 0, 0}},
1583 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1584 0x3e29864e8a2ec908},
1585 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1586 0x239b90ea3dc31e7e},
1587 {1, 0, 0, 0}},
1588 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1589 0x820f4dd949f72ff7},
1590 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1591 0x140406ec783a05ec},
1592 {1, 0, 0, 0}},
1593 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1594 0x68f6b8542783dfee},
1595 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1596 0xcbe1feba92e40ce6},
1597 {1, 0, 0, 0}},
1598 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1599 0xd0b2f94d2f420109},
1600 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1601 0x971459828b0719e5},
1602 {1, 0, 0, 0}},
1603 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1604 0x961610004a866aba},
1605 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1606 0x7acb9fadcee75e44},
1607 {1, 0, 0, 0}},
1608 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1609 0x24eb9acca333bf5b},
1610 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1611 0x69f891c5acd079cc},
1612 {1, 0, 0, 0}},
1613 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1614 0xe51f547c5972a107},
1615 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1616 0x1c309a2b25bb1387},
1617 {1, 0, 0, 0}},
1618 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1619 0x20b87b8aa2c4e503},
1620 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1621 0xf5c6fa49919776be},
1622 {1, 0, 0, 0}},
1623 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1624 0x1ed7d1b9332010b9},
1625 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1626 0x3a2b03f03217257a},
1627 {1, 0, 0, 0}},
1628 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1629 0x15fee545c78dd9f6},
1630 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1631 0x4ab5b6b2b8753f81},
1632 {1, 0, 0, 0}}}
1633 };
1634
1635 /*
1636 * select_point selects the |idx|th point from a precomputation table and
1637 * copies it to out.
1638 */
1639 static void select_point(const u64 idx, unsigned int size,
1640 const smallfelem pre_comp[16][3], smallfelem out[3])
1641 {
1642 unsigned i, j;
1643 u64 *outlimbs = &out[0][0];
1644
1645 memset(out, 0, sizeof(*out) * 3);
1646
1647 for (i = 0; i < size; i++) {
1648 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1649 u64 mask = i ^ idx;
1650 mask |= mask >> 4;
1651 mask |= mask >> 2;
1652 mask |= mask >> 1;
1653 mask &= 1;
1654 mask--;
1655 for (j = 0; j < NLIMBS * 3; j++)
1656 outlimbs[j] |= inlimbs[j] & mask;
1657 }
1658 }
1659
1660 /* get_bit returns the |i|th bit in |in| */
1661 static char get_bit(const felem_bytearray in, int i)
1662 {
1663 if ((i < 0) || (i >= 256))
1664 return 0;
1665 return (in[i >> 3] >> (i & 7)) & 1;
1666 }
1667
1668 /*
1669 * Interleaved point multiplication using precomputed point multiples: The
1670 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1671 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1672 * generator, using certain (large) precomputed multiples in g_pre_comp.
1673 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1674 */
1675 static void batch_mul(felem x_out, felem y_out, felem z_out,
1676 const felem_bytearray scalars[],
1677 const unsigned num_points, const u8 *g_scalar,
1678 const int mixed, const smallfelem pre_comp[][17][3],
1679 const smallfelem g_pre_comp[2][16][3])
1680 {
1681 int i, skip;
1682 unsigned num, gen_mul = (g_scalar != NULL);
1683 felem nq[3], ftmp;
1684 smallfelem tmp[3];
1685 u64 bits;
1686 u8 sign, digit;
1687
1688 /* set nq to the point at infinity */
1689 memset(nq, 0, sizeof(nq));
1690
1691 /*
1692 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1693 * of the generator (two in each of the last 32 rounds) and additions of
1694 * other points multiples (every 5th round).
1695 */
1696 skip = 1; /* save two point operations in the first
1697 * round */
1698 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1699 /* double */
1700 if (!skip)
1701 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1702
1703 /* add multiples of the generator */
1704 if (gen_mul && (i <= 31)) {
1705 /* first, look 32 bits upwards */
1706 bits = get_bit(g_scalar, i + 224) << 3;
1707 bits |= get_bit(g_scalar, i + 160) << 2;
1708 bits |= get_bit(g_scalar, i + 96) << 1;
1709 bits |= get_bit(g_scalar, i + 32);
1710 /* select the point to add, in constant time */
1711 select_point(bits, 16, g_pre_comp[1], tmp);
1712
1713 if (!skip) {
1714 /* Arg 1 below is for "mixed" */
1715 point_add(nq[0], nq[1], nq[2],
1716 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1717 } else {
1718 smallfelem_expand(nq[0], tmp[0]);
1719 smallfelem_expand(nq[1], tmp[1]);
1720 smallfelem_expand(nq[2], tmp[2]);
1721 skip = 0;
1722 }
1723
1724 /* second, look at the current position */
1725 bits = get_bit(g_scalar, i + 192) << 3;
1726 bits |= get_bit(g_scalar, i + 128) << 2;
1727 bits |= get_bit(g_scalar, i + 64) << 1;
1728 bits |= get_bit(g_scalar, i);
1729 /* select the point to add, in constant time */
1730 select_point(bits, 16, g_pre_comp[0], tmp);
1731 /* Arg 1 below is for "mixed" */
1732 point_add(nq[0], nq[1], nq[2],
1733 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1734 }
1735
1736 /* do other additions every 5 doublings */
1737 if (num_points && (i % 5 == 0)) {
1738 /* loop over all scalars */
1739 for (num = 0; num < num_points; ++num) {
1740 bits = get_bit(scalars[num], i + 4) << 5;
1741 bits |= get_bit(scalars[num], i + 3) << 4;
1742 bits |= get_bit(scalars[num], i + 2) << 3;
1743 bits |= get_bit(scalars[num], i + 1) << 2;
1744 bits |= get_bit(scalars[num], i) << 1;
1745 bits |= get_bit(scalars[num], i - 1);
1746 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1747
1748 /*
1749 * select the point to add or subtract, in constant time
1750 */
1751 select_point(digit, 17, pre_comp[num], tmp);
1752 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1753 * point */
1754 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1755 felem_contract(tmp[1], ftmp);
1756
1757 if (!skip) {
1758 point_add(nq[0], nq[1], nq[2],
1759 nq[0], nq[1], nq[2],
1760 mixed, tmp[0], tmp[1], tmp[2]);
1761 } else {
1762 smallfelem_expand(nq[0], tmp[0]);
1763 smallfelem_expand(nq[1], tmp[1]);
1764 smallfelem_expand(nq[2], tmp[2]);
1765 skip = 0;
1766 }
1767 }
1768 }
1769 }
1770 felem_assign(x_out, nq[0]);
1771 felem_assign(y_out, nq[1]);
1772 felem_assign(z_out, nq[2]);
1773 }
1774
1775 /* Precomputation for the group generator. */
1776 struct nistp256_pre_comp_st {
1777 smallfelem g_pre_comp[2][16][3];
1778 CRYPTO_REF_COUNT references;
1779 CRYPTO_RWLOCK *lock;
1780 };
1781
1782 const EC_METHOD *EC_GFp_nistp256_method(void)
1783 {
1784 static const EC_METHOD ret = {
1785 EC_FLAGS_DEFAULT_OCT,
1786 NID_X9_62_prime_field,
1787 ossl_ec_GFp_nistp256_group_init,
1788 ossl_ec_GFp_simple_group_finish,
1789 ossl_ec_GFp_simple_group_clear_finish,
1790 ossl_ec_GFp_nist_group_copy,
1791 ossl_ec_GFp_nistp256_group_set_curve,
1792 ossl_ec_GFp_simple_group_get_curve,
1793 ossl_ec_GFp_simple_group_get_degree,
1794 ossl_ec_group_simple_order_bits,
1795 ossl_ec_GFp_simple_group_check_discriminant,
1796 ossl_ec_GFp_simple_point_init,
1797 ossl_ec_GFp_simple_point_finish,
1798 ossl_ec_GFp_simple_point_clear_finish,
1799 ossl_ec_GFp_simple_point_copy,
1800 ossl_ec_GFp_simple_point_set_to_infinity,
1801 ossl_ec_GFp_simple_point_set_affine_coordinates,
1802 ossl_ec_GFp_nistp256_point_get_affine_coordinates,
1803 0 /* point_set_compressed_coordinates */ ,
1804 0 /* point2oct */ ,
1805 0 /* oct2point */ ,
1806 ossl_ec_GFp_simple_add,
1807 ossl_ec_GFp_simple_dbl,
1808 ossl_ec_GFp_simple_invert,
1809 ossl_ec_GFp_simple_is_at_infinity,
1810 ossl_ec_GFp_simple_is_on_curve,
1811 ossl_ec_GFp_simple_cmp,
1812 ossl_ec_GFp_simple_make_affine,
1813 ossl_ec_GFp_simple_points_make_affine,
1814 ossl_ec_GFp_nistp256_points_mul,
1815 ossl_ec_GFp_nistp256_precompute_mult,
1816 ossl_ec_GFp_nistp256_have_precompute_mult,
1817 ossl_ec_GFp_nist_field_mul,
1818 ossl_ec_GFp_nist_field_sqr,
1819 0 /* field_div */ ,
1820 ossl_ec_GFp_simple_field_inv,
1821 0 /* field_encode */ ,
1822 0 /* field_decode */ ,
1823 0, /* field_set_to_one */
1824 ossl_ec_key_simple_priv2oct,
1825 ossl_ec_key_simple_oct2priv,
1826 0, /* set private */
1827 ossl_ec_key_simple_generate_key,
1828 ossl_ec_key_simple_check_key,
1829 ossl_ec_key_simple_generate_public_key,
1830 0, /* keycopy */
1831 0, /* keyfinish */
1832 ossl_ecdh_simple_compute_key,
1833 ossl_ecdsa_simple_sign_setup,
1834 ossl_ecdsa_simple_sign_sig,
1835 ossl_ecdsa_simple_verify_sig,
1836 0, /* field_inverse_mod_ord */
1837 0, /* blind_coordinates */
1838 0, /* ladder_pre */
1839 0, /* ladder_step */
1840 0 /* ladder_post */
1841 };
1842
1843 return &ret;
1844 }
1845
1846 /******************************************************************************/
1847 /*
1848 * FUNCTIONS TO MANAGE PRECOMPUTATION
1849 */
1850
1851 static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)
1852 {
1853 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1854
1855 if (ret == NULL) {
1856 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1857 return ret;
1858 }
1859
1860 ret->references = 1;
1861
1862 ret->lock = CRYPTO_THREAD_lock_new();
1863 if (ret->lock == NULL) {
1864 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1865 OPENSSL_free(ret);
1866 return NULL;
1867 }
1868 return ret;
1869 }
1870
1871 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1872 {
1873 int i;
1874 if (p != NULL)
1875 CRYPTO_UP_REF(&p->references, &i, p->lock);
1876 return p;
1877 }
1878
1879 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1880 {
1881 int i;
1882
1883 if (pre == NULL)
1884 return;
1885
1886 CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
1887 REF_PRINT_COUNT("EC_nistp256", x);
1888 if (i > 0)
1889 return;
1890 REF_ASSERT_ISNT(i < 0);
1891
1892 CRYPTO_THREAD_lock_free(pre->lock);
1893 OPENSSL_free(pre);
1894 }
1895
1896 /******************************************************************************/
1897 /*
1898 * OPENSSL EC_METHOD FUNCTIONS
1899 */
1900
1901 int ossl_ec_GFp_nistp256_group_init(EC_GROUP *group)
1902 {
1903 int ret;
1904 ret = ossl_ec_GFp_simple_group_init(group);
1905 group->a_is_minus3 = 1;
1906 return ret;
1907 }
1908
1909 int ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1910 const BIGNUM *a, const BIGNUM *b,
1911 BN_CTX *ctx)
1912 {
1913 int ret = 0;
1914 BIGNUM *curve_p, *curve_a, *curve_b;
1915 #ifndef FIPS_MODULE
1916 BN_CTX *new_ctx = NULL;
1917
1918 if (ctx == NULL)
1919 ctx = new_ctx = BN_CTX_new();
1920 #endif
1921 if (ctx == NULL)
1922 return 0;
1923
1924 BN_CTX_start(ctx);
1925 curve_p = BN_CTX_get(ctx);
1926 curve_a = BN_CTX_get(ctx);
1927 curve_b = BN_CTX_get(ctx);
1928 if (curve_b == NULL)
1929 goto err;
1930 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1931 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1932 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1933 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1934 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1935 goto err;
1936 }
1937 group->field_mod_func = BN_nist_mod_256;
1938 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1939 err:
1940 BN_CTX_end(ctx);
1941 #ifndef FIPS_MODULE
1942 BN_CTX_free(new_ctx);
1943 #endif
1944 return ret;
1945 }
1946
1947 /*
1948 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1949 * (X/Z^2, Y/Z^3)
1950 */
1951 int ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1952 const EC_POINT *point,
1953 BIGNUM *x, BIGNUM *y,
1954 BN_CTX *ctx)
1955 {
1956 felem z1, z2, x_in, y_in;
1957 smallfelem x_out, y_out;
1958 longfelem tmp;
1959
1960 if (EC_POINT_is_at_infinity(group, point)) {
1961 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1962 return 0;
1963 }
1964 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1965 (!BN_to_felem(z1, point->Z)))
1966 return 0;
1967 felem_inv(z2, z1);
1968 felem_square(tmp, z2);
1969 felem_reduce(z1, tmp);
1970 felem_mul(tmp, x_in, z1);
1971 felem_reduce(x_in, tmp);
1972 felem_contract(x_out, x_in);
1973 if (x != NULL) {
1974 if (!smallfelem_to_BN(x, x_out)) {
1975 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1976 return 0;
1977 }
1978 }
1979 felem_mul(tmp, z1, z2);
1980 felem_reduce(z1, tmp);
1981 felem_mul(tmp, y_in, z1);
1982 felem_reduce(y_in, tmp);
1983 felem_contract(y_out, y_in);
1984 if (y != NULL) {
1985 if (!smallfelem_to_BN(y, y_out)) {
1986 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1987 return 0;
1988 }
1989 }
1990 return 1;
1991 }
1992
1993 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
1994 static void make_points_affine(size_t num, smallfelem points[][3],
1995 smallfelem tmp_smallfelems[])
1996 {
1997 /*
1998 * Runs in constant time, unless an input is the point at infinity (which
1999 * normally shouldn't happen).
2000 */
2001 ossl_ec_GFp_nistp_points_make_affine_internal(num,
2002 points,
2003 sizeof(smallfelem),
2004 tmp_smallfelems,
2005 (void (*)(void *))smallfelem_one,
2006 smallfelem_is_zero_int,
2007 (void (*)(void *, const void *))
2008 smallfelem_assign,
2009 (void (*)(void *, const void *))
2010 smallfelem_square_contract,
2011 (void (*)
2012 (void *, const void *,
2013 const void *))
2014 smallfelem_mul_contract,
2015 (void (*)(void *, const void *))
2016 smallfelem_inv_contract,
2017 /* nothing to contract */
2018 (void (*)(void *, const void *))
2019 smallfelem_assign);
2020 }
2021
2022 /*
2023 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2024 * values Result is stored in r (r can equal one of the inputs).
2025 */
2026 int ossl_ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2027 const BIGNUM *scalar, size_t num,
2028 const EC_POINT *points[],
2029 const BIGNUM *scalars[], BN_CTX *ctx)
2030 {
2031 int ret = 0;
2032 int j;
2033 int mixed = 0;
2034 BIGNUM *x, *y, *z, *tmp_scalar;
2035 felem_bytearray g_secret;
2036 felem_bytearray *secrets = NULL;
2037 smallfelem (*pre_comp)[17][3] = NULL;
2038 smallfelem *tmp_smallfelems = NULL;
2039 unsigned i;
2040 int num_bytes;
2041 int have_pre_comp = 0;
2042 size_t num_points = num;
2043 smallfelem x_in, y_in, z_in;
2044 felem x_out, y_out, z_out;
2045 NISTP256_PRE_COMP *pre = NULL;
2046 const smallfelem(*g_pre_comp)[16][3] = NULL;
2047 EC_POINT *generator = NULL;
2048 const EC_POINT *p = NULL;
2049 const BIGNUM *p_scalar = NULL;
2050
2051 BN_CTX_start(ctx);
2052 x = BN_CTX_get(ctx);
2053 y = BN_CTX_get(ctx);
2054 z = BN_CTX_get(ctx);
2055 tmp_scalar = BN_CTX_get(ctx);
2056 if (tmp_scalar == NULL)
2057 goto err;
2058
2059 if (scalar != NULL) {
2060 pre = group->pre_comp.nistp256;
2061 if (pre)
2062 /* we have precomputation, try to use it */
2063 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2064 else
2065 /* try to use the standard precomputation */
2066 g_pre_comp = &gmul[0];
2067 generator = EC_POINT_new(group);
2068 if (generator == NULL)
2069 goto err;
2070 /* get the generator from precomputation */
2071 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2072 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2073 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2074 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2075 goto err;
2076 }
2077 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
2078 generator,
2079 x, y, z, ctx))
2080 goto err;
2081 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2082 /* precomputation matches generator */
2083 have_pre_comp = 1;
2084 else
2085 /*
2086 * we don't have valid precomputation: treat the generator as a
2087 * random point
2088 */
2089 num_points++;
2090 }
2091 if (num_points > 0) {
2092 if (num_points >= 3) {
2093 /*
2094 * unless we precompute multiples for just one or two points,
2095 * converting those into affine form is time well spent
2096 */
2097 mixed = 1;
2098 }
2099 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2100 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2101 if (mixed)
2102 tmp_smallfelems =
2103 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2104 if ((secrets == NULL) || (pre_comp == NULL)
2105 || (mixed && (tmp_smallfelems == NULL))) {
2106 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
2107 goto err;
2108 }
2109
2110 /*
2111 * we treat NULL scalars as 0, and NULL points as points at infinity,
2112 * i.e., they contribute nothing to the linear combination
2113 */
2114 memset(secrets, 0, sizeof(*secrets) * num_points);
2115 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2116 for (i = 0; i < num_points; ++i) {
2117 if (i == num) {
2118 /*
2119 * we didn't have a valid precomputation, so we pick the
2120 * generator
2121 */
2122 p = EC_GROUP_get0_generator(group);
2123 p_scalar = scalar;
2124 } else {
2125 /* the i^th point */
2126 p = points[i];
2127 p_scalar = scalars[i];
2128 }
2129 if ((p_scalar != NULL) && (p != NULL)) {
2130 /* reduce scalar to 0 <= scalar < 2^256 */
2131 if ((BN_num_bits(p_scalar) > 256)
2132 || (BN_is_negative(p_scalar))) {
2133 /*
2134 * this is an unusual input, and we don't guarantee
2135 * constant-timeness
2136 */
2137 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2138 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2139 goto err;
2140 }
2141 num_bytes = BN_bn2lebinpad(tmp_scalar,
2142 secrets[i], sizeof(secrets[i]));
2143 } else {
2144 num_bytes = BN_bn2lebinpad(p_scalar,
2145 secrets[i], sizeof(secrets[i]));
2146 }
2147 if (num_bytes < 0) {
2148 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2149 goto err;
2150 }
2151 /* precompute multiples */
2152 if ((!BN_to_felem(x_out, p->X)) ||
2153 (!BN_to_felem(y_out, p->Y)) ||
2154 (!BN_to_felem(z_out, p->Z)))
2155 goto err;
2156 felem_shrink(pre_comp[i][1][0], x_out);
2157 felem_shrink(pre_comp[i][1][1], y_out);
2158 felem_shrink(pre_comp[i][1][2], z_out);
2159 for (j = 2; j <= 16; ++j) {
2160 if (j & 1) {
2161 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2162 pre_comp[i][j][2], pre_comp[i][1][0],
2163 pre_comp[i][1][1], pre_comp[i][1][2],
2164 pre_comp[i][j - 1][0],
2165 pre_comp[i][j - 1][1],
2166 pre_comp[i][j - 1][2]);
2167 } else {
2168 point_double_small(pre_comp[i][j][0],
2169 pre_comp[i][j][1],
2170 pre_comp[i][j][2],
2171 pre_comp[i][j / 2][0],
2172 pre_comp[i][j / 2][1],
2173 pre_comp[i][j / 2][2]);
2174 }
2175 }
2176 }
2177 }
2178 if (mixed)
2179 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2180 }
2181
2182 /* the scalar for the generator */
2183 if ((scalar != NULL) && (have_pre_comp)) {
2184 memset(g_secret, 0, sizeof(g_secret));
2185 /* reduce scalar to 0 <= scalar < 2^256 */
2186 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2187 /*
2188 * this is an unusual input, and we don't guarantee
2189 * constant-timeness
2190 */
2191 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2192 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2193 goto err;
2194 }
2195 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
2196 } else {
2197 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
2198 }
2199 /* do the multiplication with generator precomputation */
2200 batch_mul(x_out, y_out, z_out,
2201 (const felem_bytearray(*))secrets, num_points,
2202 g_secret,
2203 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2204 } else {
2205 /* do the multiplication without generator precomputation */
2206 batch_mul(x_out, y_out, z_out,
2207 (const felem_bytearray(*))secrets, num_points,
2208 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2209 }
2210 /* reduce the output to its unique minimal representation */
2211 felem_contract(x_in, x_out);
2212 felem_contract(y_in, y_out);
2213 felem_contract(z_in, z_out);
2214 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2215 (!smallfelem_to_BN(z, z_in))) {
2216 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2217 goto err;
2218 }
2219 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
2220 ctx);
2221
2222 err:
2223 BN_CTX_end(ctx);
2224 EC_POINT_free(generator);
2225 OPENSSL_free(secrets);
2226 OPENSSL_free(pre_comp);
2227 OPENSSL_free(tmp_smallfelems);
2228 return ret;
2229 }
2230
2231 int ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2232 {
2233 int ret = 0;
2234 NISTP256_PRE_COMP *pre = NULL;
2235 int i, j;
2236 BIGNUM *x, *y;
2237 EC_POINT *generator = NULL;
2238 smallfelem tmp_smallfelems[32];
2239 felem x_tmp, y_tmp, z_tmp;
2240 #ifndef FIPS_MODULE
2241 BN_CTX *new_ctx = NULL;
2242 #endif
2243
2244 /* throw away old precomputation */
2245 EC_pre_comp_free(group);
2246
2247 #ifndef FIPS_MODULE
2248 if (ctx == NULL)
2249 ctx = new_ctx = BN_CTX_new();
2250 #endif
2251 if (ctx == NULL)
2252 return 0;
2253
2254 BN_CTX_start(ctx);
2255 x = BN_CTX_get(ctx);
2256 y = BN_CTX_get(ctx);
2257 if (y == NULL)
2258 goto err;
2259 /* get the generator */
2260 if (group->generator == NULL)
2261 goto err;
2262 generator = EC_POINT_new(group);
2263 if (generator == NULL)
2264 goto err;
2265 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2266 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2267 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
2268 goto err;
2269 if ((pre = nistp256_pre_comp_new()) == NULL)
2270 goto err;
2271 /*
2272 * if the generator is the standard one, use built-in precomputation
2273 */
2274 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2275 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2276 goto done;
2277 }
2278 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2279 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2280 (!BN_to_felem(z_tmp, group->generator->Z)))
2281 goto err;
2282 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2283 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2284 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2285 /*
2286 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2287 * 2^160*G, 2^224*G for the second one
2288 */
2289 for (i = 1; i <= 8; i <<= 1) {
2290 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2291 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2292 pre->g_pre_comp[0][i][1],
2293 pre->g_pre_comp[0][i][2]);
2294 for (j = 0; j < 31; ++j) {
2295 point_double_small(pre->g_pre_comp[1][i][0],
2296 pre->g_pre_comp[1][i][1],
2297 pre->g_pre_comp[1][i][2],
2298 pre->g_pre_comp[1][i][0],
2299 pre->g_pre_comp[1][i][1],
2300 pre->g_pre_comp[1][i][2]);
2301 }
2302 if (i == 8)
2303 break;
2304 point_double_small(pre->g_pre_comp[0][2 * i][0],
2305 pre->g_pre_comp[0][2 * i][1],
2306 pre->g_pre_comp[0][2 * i][2],
2307 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2308 pre->g_pre_comp[1][i][2]);
2309 for (j = 0; j < 31; ++j) {
2310 point_double_small(pre->g_pre_comp[0][2 * i][0],
2311 pre->g_pre_comp[0][2 * i][1],
2312 pre->g_pre_comp[0][2 * i][2],
2313 pre->g_pre_comp[0][2 * i][0],
2314 pre->g_pre_comp[0][2 * i][1],
2315 pre->g_pre_comp[0][2 * i][2]);
2316 }
2317 }
2318 for (i = 0; i < 2; i++) {
2319 /* g_pre_comp[i][0] is the point at infinity */
2320 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2321 /* the remaining multiples */
2322 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2323 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2324 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2325 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2326 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2327 pre->g_pre_comp[i][2][2]);
2328 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2329 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2330 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2331 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2332 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2333 pre->g_pre_comp[i][2][2]);
2334 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2335 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2336 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2337 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2338 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2339 pre->g_pre_comp[i][4][2]);
2340 /*
2341 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2342 */
2343 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2344 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2345 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2346 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2347 pre->g_pre_comp[i][2][2]);
2348 for (j = 1; j < 8; ++j) {
2349 /* odd multiples: add G resp. 2^32*G */
2350 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2351 pre->g_pre_comp[i][2 * j + 1][1],
2352 pre->g_pre_comp[i][2 * j + 1][2],
2353 pre->g_pre_comp[i][2 * j][0],
2354 pre->g_pre_comp[i][2 * j][1],
2355 pre->g_pre_comp[i][2 * j][2],
2356 pre->g_pre_comp[i][1][0],
2357 pre->g_pre_comp[i][1][1],
2358 pre->g_pre_comp[i][1][2]);
2359 }
2360 }
2361 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2362
2363 done:
2364 SETPRECOMP(group, nistp256, pre);
2365 pre = NULL;
2366 ret = 1;
2367
2368 err:
2369 BN_CTX_end(ctx);
2370 EC_POINT_free(generator);
2371 #ifndef FIPS_MODULE
2372 BN_CTX_free(new_ctx);
2373 #endif
2374 EC_nistp256_pre_comp_free(pre);
2375 return ret;
2376 }
2377
2378 int ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2379 {
2380 return HAVEPRECOMP(group, nistp256);
2381 }