]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp256.c
Add support for reference counting using C11 atomics
[thirdparty/openssl.git] / crypto / ec / ecp_nistp256.c
CommitLineData
3e00b4c9 1/*
4f22f405
RS
2 * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
3e00b4c9 8 */
4f22f405 9
3e00b4c9
BM
10/* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26/*
27 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
28 *
29 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
30 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
31 * work which got its smarts from Daniel J. Bernstein's work on the same.
32 */
33
e0d6132b 34#include <openssl/opensslconf.h>
effaf4de
RS
35#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
36NON_EMPTY_TRANSLATION_UNIT
37#else
3e00b4c9 38
0f113f3e
MC
39# include <stdint.h>
40# include <string.h>
41# include <openssl/err.h>
42# include "ec_lcl.h"
3e00b4c9 43
0f113f3e 44# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
3e00b4c9 45 /* even with gcc, the typedef won't work for 32-bit platforms */
0f113f3e
MC
46typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
47 * platforms */
48typedef __int128_t int128_t;
49# else
50# error "Need GCC 3.1 or later to define type uint128_t"
51# endif
3e00b4c9
BM
52
53typedef uint8_t u8;
54typedef uint32_t u32;
55typedef uint64_t u64;
56typedef int64_t s64;
57
0f113f3e
MC
58/*
59 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
60 * can serialise an element of this field into 32 bytes. We call this an
61 * felem_bytearray.
62 */
3e00b4c9
BM
63
64typedef u8 felem_bytearray[32];
65
0f113f3e
MC
66/*
67 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
68 * values are big-endian.
69 */
3e00b4c9 70static const felem_bytearray nistp256_curve_params[5] = {
0f113f3e
MC
71 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
73 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
75 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
76 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
79 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
80 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
81 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
82 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
83 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
84 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
85 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
86 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
87 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
88 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
89 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
90 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
3e00b4c9
BM
91};
92
1d97c843
TH
93/*-
94 * The representation of field elements.
3e00b4c9
BM
95 * ------------------------------------
96 *
97 * We represent field elements with either four 128-bit values, eight 128-bit
98 * values, or four 64-bit values. The field element represented is:
99 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
100 * or:
101 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
102 *
103 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
104 * apart, but are 128-bits wide, the most significant bits of each limb overlap
105 * with the least significant bits of the next.
106 *
107 * A field element with four limbs is an 'felem'. One with eight limbs is a
108 * 'longfelem'
109 *
110 * A field element with four, 64-bit values is called a 'smallfelem'. Small
111 * values are used as intermediate values before multiplication.
112 */
113
0f113f3e 114# define NLIMBS 4
3e00b4c9
BM
115
116typedef uint128_t limb;
117typedef limb felem[NLIMBS];
118typedef limb longfelem[NLIMBS * 2];
119typedef u64 smallfelem[NLIMBS];
120
121/* This is the value of the prime as four 64-bit words, little-endian. */
0f113f3e
MC
122static const u64 kPrime[4] =
123 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
3e00b4c9
BM
124static const u64 bottom63bits = 0x7ffffffffffffffful;
125
0f113f3e
MC
126/*
127 * bin32_to_felem takes a little-endian byte array and converts it into felem
128 * form. This assumes that the CPU is little-endian.
129 */
3e00b4c9 130static void bin32_to_felem(felem out, const u8 in[32])
0f113f3e
MC
131{
132 out[0] = *((u64 *)&in[0]);
133 out[1] = *((u64 *)&in[8]);
134 out[2] = *((u64 *)&in[16]);
135 out[3] = *((u64 *)&in[24]);
136}
137
138/*
139 * smallfelem_to_bin32 takes a smallfelem and serialises into a little
140 * endian, 32 byte array. This assumes that the CPU is little-endian.
141 */
3e00b4c9 142static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
0f113f3e
MC
143{
144 *((u64 *)&out[0]) = in[0];
145 *((u64 *)&out[8]) = in[1];
146 *((u64 *)&out[16]) = in[2];
147 *((u64 *)&out[24]) = in[3];
148}
3e00b4c9
BM
149
150/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
151static void flip_endian(u8 *out, const u8 *in, unsigned len)
0f113f3e
MC
152{
153 unsigned i;
154 for (i = 0; i < len; ++i)
155 out[i] = in[len - 1 - i];
156}
3e00b4c9
BM
157
158/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
159static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e
MC
160{
161 felem_bytearray b_in;
162 felem_bytearray b_out;
163 unsigned num_bytes;
164
165 /* BN_bn2bin eats leading zeroes */
16f8d4eb 166 memset(b_out, 0, sizeof(b_out));
0f113f3e
MC
167 num_bytes = BN_num_bytes(bn);
168 if (num_bytes > sizeof b_out) {
169 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
170 return 0;
171 }
172 if (BN_is_negative(bn)) {
173 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
174 return 0;
175 }
176 num_bytes = BN_bn2bin(bn, b_in);
177 flip_endian(b_out, b_in, num_bytes);
178 bin32_to_felem(out, b_out);
179 return 1;
180}
3e00b4c9
BM
181
182/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
183static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
0f113f3e
MC
184{
185 felem_bytearray b_in, b_out;
186 smallfelem_to_bin32(b_in, in);
187 flip_endian(b_out, b_in, sizeof b_out);
188 return BN_bin2bn(b_out, sizeof b_out, out);
189}
3e00b4c9 190
3a83462d
MC
191/*-
192 * Field operations
193 * ----------------
194 */
3e00b4c9
BM
195
196static void smallfelem_one(smallfelem out)
0f113f3e
MC
197{
198 out[0] = 1;
199 out[1] = 0;
200 out[2] = 0;
201 out[3] = 0;
202}
3e00b4c9
BM
203
204static void smallfelem_assign(smallfelem out, const smallfelem in)
0f113f3e
MC
205{
206 out[0] = in[0];
207 out[1] = in[1];
208 out[2] = in[2];
209 out[3] = in[3];
210}
3e00b4c9
BM
211
212static void felem_assign(felem out, const felem in)
0f113f3e
MC
213{
214 out[0] = in[0];
215 out[1] = in[1];
216 out[2] = in[2];
217 out[3] = in[3];
218}
3e00b4c9
BM
219
220/* felem_sum sets out = out + in. */
221static void felem_sum(felem out, const felem in)
0f113f3e
MC
222{
223 out[0] += in[0];
224 out[1] += in[1];
225 out[2] += in[2];
226 out[3] += in[3];
227}
3e00b4c9
BM
228
229/* felem_small_sum sets out = out + in. */
230static void felem_small_sum(felem out, const smallfelem in)
0f113f3e
MC
231{
232 out[0] += in[0];
233 out[1] += in[1];
234 out[2] += in[2];
235 out[3] += in[3];
236}
3e00b4c9
BM
237
238/* felem_scalar sets out = out * scalar */
239static void felem_scalar(felem out, const u64 scalar)
0f113f3e
MC
240{
241 out[0] *= scalar;
242 out[1] *= scalar;
243 out[2] *= scalar;
244 out[3] *= scalar;
245}
3e00b4c9
BM
246
247/* longfelem_scalar sets out = out * scalar */
248static void longfelem_scalar(longfelem out, const u64 scalar)
0f113f3e
MC
249{
250 out[0] *= scalar;
251 out[1] *= scalar;
252 out[2] *= scalar;
253 out[3] *= scalar;
254 out[4] *= scalar;
255 out[5] *= scalar;
256 out[6] *= scalar;
257 out[7] *= scalar;
258}
259
260# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
261# define two105 (((limb)1) << 105)
262# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
3e00b4c9
BM
263
264/* zero105 is 0 mod p */
0f113f3e
MC
265static const felem zero105 =
266 { two105m41m9, two105, two105m41p9, two105m41p9 };
3e00b4c9 267
1d97c843
TH
268/*-
269 * smallfelem_neg sets |out| to |-small|
3e00b4c9
BM
270 * On exit:
271 * out[i] < out[i] + 2^105
272 */
273static void smallfelem_neg(felem out, const smallfelem small)
0f113f3e
MC
274{
275 /* In order to prevent underflow, we subtract from 0 mod p. */
276 out[0] = zero105[0] - small[0];
277 out[1] = zero105[1] - small[1];
278 out[2] = zero105[2] - small[2];
279 out[3] = zero105[3] - small[3];
280}
3e00b4c9 281
1d97c843
TH
282/*-
283 * felem_diff subtracts |in| from |out|
3e00b4c9
BM
284 * On entry:
285 * in[i] < 2^104
286 * On exit:
287 * out[i] < out[i] + 2^105
288 */
289static void felem_diff(felem out, const felem in)
0f113f3e
MC
290{
291 /*
292 * In order to prevent underflow, we add 0 mod p before subtracting.
293 */
294 out[0] += zero105[0];
295 out[1] += zero105[1];
296 out[2] += zero105[2];
297 out[3] += zero105[3];
298
299 out[0] -= in[0];
300 out[1] -= in[1];
301 out[2] -= in[2];
302 out[3] -= in[3];
303}
304
305# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
306# define two107 (((limb)1) << 107)
307# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
3e00b4c9
BM
308
309/* zero107 is 0 mod p */
0f113f3e
MC
310static const felem zero107 =
311 { two107m43m11, two107, two107m43p11, two107m43p11 };
3e00b4c9 312
1d97c843
TH
313/*-
314 * An alternative felem_diff for larger inputs |in|
3e00b4c9
BM
315 * felem_diff_zero107 subtracts |in| from |out|
316 * On entry:
317 * in[i] < 2^106
318 * On exit:
319 * out[i] < out[i] + 2^107
320 */
321static void felem_diff_zero107(felem out, const felem in)
0f113f3e
MC
322{
323 /*
324 * In order to prevent underflow, we add 0 mod p before subtracting.
325 */
326 out[0] += zero107[0];
327 out[1] += zero107[1];
328 out[2] += zero107[2];
329 out[3] += zero107[3];
330
331 out[0] -= in[0];
332 out[1] -= in[1];
333 out[2] -= in[2];
334 out[3] -= in[3];
335}
3e00b4c9 336
1d97c843
TH
337/*-
338 * longfelem_diff subtracts |in| from |out|
3e00b4c9
BM
339 * On entry:
340 * in[i] < 7*2^67
341 * On exit:
342 * out[i] < out[i] + 2^70 + 2^40
343 */
344static void longfelem_diff(longfelem out, const longfelem in)
0f113f3e
MC
345{
346 static const limb two70m8p6 =
347 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
348 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
349 static const limb two70 = (((limb) 1) << 70);
350 static const limb two70m40m38p6 =
351 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
352 (((limb) 1) << 6);
353 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
354
355 /* add 0 mod p to avoid underflow */
356 out[0] += two70m8p6;
357 out[1] += two70p40;
358 out[2] += two70;
359 out[3] += two70m40m38p6;
360 out[4] += two70m6;
361 out[5] += two70m6;
362 out[6] += two70m6;
363 out[7] += two70m6;
364
365 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
366 out[0] -= in[0];
367 out[1] -= in[1];
368 out[2] -= in[2];
369 out[3] -= in[3];
370 out[4] -= in[4];
371 out[5] -= in[5];
372 out[6] -= in[6];
373 out[7] -= in[7];
374}
375
376# define two64m0 (((limb)1) << 64) - 1
377# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
378# define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
379# define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
3e00b4c9
BM
380
381/* zero110 is 0 mod p */
382static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
383
1d97c843
TH
384/*-
385 * felem_shrink converts an felem into a smallfelem. The result isn't quite
3e00b4c9
BM
386 * minimal as the value may be greater than p.
387 *
388 * On entry:
389 * in[i] < 2^109
390 * On exit:
391 * out[i] < 2^64
392 */
393static void felem_shrink(smallfelem out, const felem in)
0f113f3e
MC
394{
395 felem tmp;
396 u64 a, b, mask;
397 s64 high, low;
398 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
399
400 /* Carry 2->3 */
401 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
402 /* tmp[3] < 2^110 */
403
404 tmp[2] = zero110[2] + (u64)in[2];
405 tmp[0] = zero110[0] + in[0];
406 tmp[1] = zero110[1] + in[1];
407 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
408
409 /*
410 * We perform two partial reductions where we eliminate the high-word of
411 * tmp[3]. We don't update the other words till the end.
412 */
413 a = tmp[3] >> 64; /* a < 2^46 */
414 tmp[3] = (u64)tmp[3];
415 tmp[3] -= a;
416 tmp[3] += ((limb) a) << 32;
417 /* tmp[3] < 2^79 */
418
419 b = a;
420 a = tmp[3] >> 64; /* a < 2^15 */
421 b += a; /* b < 2^46 + 2^15 < 2^47 */
422 tmp[3] = (u64)tmp[3];
423 tmp[3] -= a;
424 tmp[3] += ((limb) a) << 32;
425 /* tmp[3] < 2^64 + 2^47 */
426
427 /*
428 * This adjusts the other two words to complete the two partial
429 * reductions.
430 */
431 tmp[0] += b;
432 tmp[1] -= (((limb) b) << 32);
433
434 /*
435 * In order to make space in tmp[3] for the carry from 2 -> 3, we
436 * conditionally subtract kPrime if tmp[3] is large enough.
437 */
438 high = tmp[3] >> 64;
439 /* As tmp[3] < 2^65, high is either 1 or 0 */
440 high <<= 63;
441 high >>= 63;
35a1cc90
MC
442 /*-
443 * high is:
444 * all ones if the high word of tmp[3] is 1
445 * all zeros if the high word of tmp[3] if 0 */
0f113f3e
MC
446 low = tmp[3];
447 mask = low >> 63;
35a1cc90
MC
448 /*-
449 * mask is:
450 * all ones if the MSB of low is 1
451 * all zeros if the MSB of low if 0 */
0f113f3e
MC
452 low &= bottom63bits;
453 low -= kPrime3Test;
454 /* if low was greater than kPrime3Test then the MSB is zero */
455 low = ~low;
456 low >>= 63;
35a1cc90
MC
457 /*-
458 * low is:
459 * all ones if low was > kPrime3Test
460 * all zeros if low was <= kPrime3Test */
0f113f3e
MC
461 mask = (mask & low) | high;
462 tmp[0] -= mask & kPrime[0];
463 tmp[1] -= mask & kPrime[1];
464 /* kPrime[2] is zero, so omitted */
465 tmp[3] -= mask & kPrime[3];
466 /* tmp[3] < 2**64 - 2**32 + 1 */
467
468 tmp[1] += ((u64)(tmp[0] >> 64));
469 tmp[0] = (u64)tmp[0];
470 tmp[2] += ((u64)(tmp[1] >> 64));
471 tmp[1] = (u64)tmp[1];
472 tmp[3] += ((u64)(tmp[2] >> 64));
473 tmp[2] = (u64)tmp[2];
474 /* tmp[i] < 2^64 */
475
476 out[0] = tmp[0];
477 out[1] = tmp[1];
478 out[2] = tmp[2];
479 out[3] = tmp[3];
480}
3e00b4c9
BM
481
482/* smallfelem_expand converts a smallfelem to an felem */
483static void smallfelem_expand(felem out, const smallfelem in)
0f113f3e
MC
484{
485 out[0] = in[0];
486 out[1] = in[1];
487 out[2] = in[2];
488 out[3] = in[3];
489}
490
491/*-
1d97c843 492 * smallfelem_square sets |out| = |small|^2
3e00b4c9
BM
493 * On entry:
494 * small[i] < 2^64
495 * On exit:
496 * out[i] < 7 * 2^64 < 2^67
497 */
498static void smallfelem_square(longfelem out, const smallfelem small)
0f113f3e
MC
499{
500 limb a;
501 u64 high, low;
502
503 a = ((uint128_t) small[0]) * small[0];
504 low = a;
505 high = a >> 64;
506 out[0] = low;
507 out[1] = high;
508
509 a = ((uint128_t) small[0]) * small[1];
510 low = a;
511 high = a >> 64;
512 out[1] += low;
513 out[1] += low;
514 out[2] = high;
515
516 a = ((uint128_t) small[0]) * small[2];
517 low = a;
518 high = a >> 64;
519 out[2] += low;
520 out[2] *= 2;
521 out[3] = high;
522
523 a = ((uint128_t) small[0]) * small[3];
524 low = a;
525 high = a >> 64;
526 out[3] += low;
527 out[4] = high;
528
529 a = ((uint128_t) small[1]) * small[2];
530 low = a;
531 high = a >> 64;
532 out[3] += low;
533 out[3] *= 2;
534 out[4] += high;
535
536 a = ((uint128_t) small[1]) * small[1];
537 low = a;
538 high = a >> 64;
539 out[2] += low;
540 out[3] += high;
541
542 a = ((uint128_t) small[1]) * small[3];
543 low = a;
544 high = a >> 64;
545 out[4] += low;
546 out[4] *= 2;
547 out[5] = high;
548
549 a = ((uint128_t) small[2]) * small[3];
550 low = a;
551 high = a >> 64;
552 out[5] += low;
553 out[5] *= 2;
554 out[6] = high;
555 out[6] += high;
556
557 a = ((uint128_t) small[2]) * small[2];
558 low = a;
559 high = a >> 64;
560 out[4] += low;
561 out[5] += high;
562
563 a = ((uint128_t) small[3]) * small[3];
564 low = a;
565 high = a >> 64;
566 out[6] += low;
567 out[7] = high;
568}
3e00b4c9 569
1d97c843
TH
570/*-
571 * felem_square sets |out| = |in|^2
3e00b4c9
BM
572 * On entry:
573 * in[i] < 2^109
574 * On exit:
575 * out[i] < 7 * 2^64 < 2^67
576 */
577static void felem_square(longfelem out, const felem in)
0f113f3e
MC
578{
579 u64 small[4];
580 felem_shrink(small, in);
581 smallfelem_square(out, small);
582}
3e00b4c9 583
1d97c843
TH
584/*-
585 * smallfelem_mul sets |out| = |small1| * |small2|
3e00b4c9
BM
586 * On entry:
587 * small1[i] < 2^64
588 * small2[i] < 2^64
589 * On exit:
590 * out[i] < 7 * 2^64 < 2^67
591 */
0f113f3e
MC
592static void smallfelem_mul(longfelem out, const smallfelem small1,
593 const smallfelem small2)
594{
595 limb a;
596 u64 high, low;
597
598 a = ((uint128_t) small1[0]) * small2[0];
599 low = a;
600 high = a >> 64;
601 out[0] = low;
602 out[1] = high;
603
604 a = ((uint128_t) small1[0]) * small2[1];
605 low = a;
606 high = a >> 64;
607 out[1] += low;
608 out[2] = high;
609
610 a = ((uint128_t) small1[1]) * small2[0];
611 low = a;
612 high = a >> 64;
613 out[1] += low;
614 out[2] += high;
615
616 a = ((uint128_t) small1[0]) * small2[2];
617 low = a;
618 high = a >> 64;
619 out[2] += low;
620 out[3] = high;
621
622 a = ((uint128_t) small1[1]) * small2[1];
623 low = a;
624 high = a >> 64;
625 out[2] += low;
626 out[3] += high;
627
628 a = ((uint128_t) small1[2]) * small2[0];
629 low = a;
630 high = a >> 64;
631 out[2] += low;
632 out[3] += high;
633
634 a = ((uint128_t) small1[0]) * small2[3];
635 low = a;
636 high = a >> 64;
637 out[3] += low;
638 out[4] = high;
639
640 a = ((uint128_t) small1[1]) * small2[2];
641 low = a;
642 high = a >> 64;
643 out[3] += low;
644 out[4] += high;
645
646 a = ((uint128_t) small1[2]) * small2[1];
647 low = a;
648 high = a >> 64;
649 out[3] += low;
650 out[4] += high;
651
652 a = ((uint128_t) small1[3]) * small2[0];
653 low = a;
654 high = a >> 64;
655 out[3] += low;
656 out[4] += high;
657
658 a = ((uint128_t) small1[1]) * small2[3];
659 low = a;
660 high = a >> 64;
661 out[4] += low;
662 out[5] = high;
663
664 a = ((uint128_t) small1[2]) * small2[2];
665 low = a;
666 high = a >> 64;
667 out[4] += low;
668 out[5] += high;
669
670 a = ((uint128_t) small1[3]) * small2[1];
671 low = a;
672 high = a >> 64;
673 out[4] += low;
674 out[5] += high;
675
676 a = ((uint128_t) small1[2]) * small2[3];
677 low = a;
678 high = a >> 64;
679 out[5] += low;
680 out[6] = high;
681
682 a = ((uint128_t) small1[3]) * small2[2];
683 low = a;
684 high = a >> 64;
685 out[5] += low;
686 out[6] += high;
687
688 a = ((uint128_t) small1[3]) * small2[3];
689 low = a;
690 high = a >> 64;
691 out[6] += low;
692 out[7] = high;
693}
3e00b4c9 694
1d97c843
TH
695/*-
696 * felem_mul sets |out| = |in1| * |in2|
3e00b4c9
BM
697 * On entry:
698 * in1[i] < 2^109
699 * in2[i] < 2^109
700 * On exit:
701 * out[i] < 7 * 2^64 < 2^67
702 */
703static void felem_mul(longfelem out, const felem in1, const felem in2)
0f113f3e
MC
704{
705 smallfelem small1, small2;
706 felem_shrink(small1, in1);
707 felem_shrink(small2, in2);
708 smallfelem_mul(out, small1, small2);
709}
3e00b4c9 710
1d97c843
TH
711/*-
712 * felem_small_mul sets |out| = |small1| * |in2|
3e00b4c9
BM
713 * On entry:
714 * small1[i] < 2^64
715 * in2[i] < 2^109
716 * On exit:
717 * out[i] < 7 * 2^64 < 2^67
718 */
0f113f3e
MC
719static void felem_small_mul(longfelem out, const smallfelem small1,
720 const felem in2)
721{
722 smallfelem small2;
723 felem_shrink(small2, in2);
724 smallfelem_mul(out, small1, small2);
725}
726
727# define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
728# define two100 (((limb)1) << 100)
729# define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
3e00b4c9 730/* zero100 is 0 mod p */
0f113f3e
MC
731static const felem zero100 =
732 { two100m36m4, two100, two100m36p4, two100m36p4 };
3e00b4c9 733
1d97c843
TH
734/*-
735 * Internal function for the different flavours of felem_reduce.
3e00b4c9
BM
736 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
737 * On entry:
0f113f3e 738 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
3e00b4c9
BM
739 * out[1] >= in[7] + 2^32*in[4]
740 * out[2] >= in[5] + 2^32*in[5]
741 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
742 * On exit:
743 * out[0] <= out[0] + in[4] + 2^32*in[5]
744 * out[1] <= out[1] + in[5] + 2^33*in[6]
745 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
746 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
747 */
748static void felem_reduce_(felem out, const longfelem in)
0f113f3e
MC
749{
750 int128_t c;
751 /* combine common terms from below */
752 c = in[4] + (in[5] << 32);
753 out[0] += c;
754 out[3] -= c;
755
756 c = in[5] - in[7];
757 out[1] += c;
758 out[2] -= c;
759
760 /* the remaining terms */
761 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
762 out[1] -= (in[4] << 32);
763 out[3] += (in[4] << 32);
764
765 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
766 out[2] -= (in[5] << 32);
767
768 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
769 out[0] -= in[6];
770 out[0] -= (in[6] << 32);
771 out[1] += (in[6] << 33);
772 out[2] += (in[6] * 2);
773 out[3] -= (in[6] << 32);
774
775 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
776 out[0] -= in[7];
777 out[0] -= (in[7] << 32);
778 out[2] += (in[7] << 33);
779 out[3] += (in[7] * 3);
780}
3e00b4c9 781
1d97c843
TH
782/*-
783 * felem_reduce converts a longfelem into an felem.
3e00b4c9
BM
784 * To be called directly after felem_square or felem_mul.
785 * On entry:
786 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
787 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
788 * On exit:
789 * out[i] < 2^101
790 */
791static void felem_reduce(felem out, const longfelem in)
0f113f3e
MC
792{
793 out[0] = zero100[0] + in[0];
794 out[1] = zero100[1] + in[1];
795 out[2] = zero100[2] + in[2];
796 out[3] = zero100[3] + in[3];
797
798 felem_reduce_(out, in);
799
35a1cc90
MC
800 /*-
801 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
802 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
803 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
804 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
805 *
806 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
807 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
808 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
809 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
810 */
0f113f3e 811}
3e00b4c9 812
1d97c843
TH
813/*-
814 * felem_reduce_zero105 converts a larger longfelem into an felem.
3e00b4c9
BM
815 * On entry:
816 * in[0] < 2^71
817 * On exit:
818 * out[i] < 2^106
819 */
820static void felem_reduce_zero105(felem out, const longfelem in)
0f113f3e
MC
821{
822 out[0] = zero105[0] + in[0];
823 out[1] = zero105[1] + in[1];
824 out[2] = zero105[2] + in[2];
825 out[3] = zero105[3] + in[3];
826
827 felem_reduce_(out, in);
828
35a1cc90
MC
829 /*-
830 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
831 * out[1] > 2^105 - 2^71 - 2^103 > 0
832 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
833 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
834 *
835 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
836 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
837 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
838 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
839 */
0f113f3e
MC
840}
841
842/*
843 * subtract_u64 sets *result = *result - v and *carry to one if the
844 * subtraction underflowed.
845 */
846static void subtract_u64(u64 *result, u64 *carry, u64 v)
847{
848 uint128_t r = *result;
849 r -= v;
850 *carry = (r >> 64) & 1;
851 *result = (u64)r;
852}
853
854/*
855 * felem_contract converts |in| to its unique, minimal representation. On
856 * entry: in[i] < 2^109
3e00b4c9
BM
857 */
858static void felem_contract(smallfelem out, const felem in)
0f113f3e
MC
859{
860 unsigned i;
861 u64 all_equal_so_far = 0, result = 0, carry;
862
863 felem_shrink(out, in);
864 /* small is minimal except that the value might be > p */
865
866 all_equal_so_far--;
867 /*
868 * We are doing a constant time test if out >= kPrime. We need to compare
869 * each u64, from most-significant to least significant. For each one, if
870 * all words so far have been equal (m is all ones) then a non-equal
871 * result is the answer. Otherwise we continue.
872 */
873 for (i = 3; i < 4; i--) {
874 u64 equal;
875 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
876 /*
877 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
878 * will all be set.
879 */
880 result |= all_equal_so_far & ((u64)(a >> 64));
881
882 /*
883 * if kPrime[i] == out[i] then |equal| will be all zeros and the
884 * decrement will make it all ones.
885 */
886 equal = kPrime[i] ^ out[i];
887 equal--;
888 equal &= equal << 32;
889 equal &= equal << 16;
890 equal &= equal << 8;
891 equal &= equal << 4;
892 equal &= equal << 2;
893 equal &= equal << 1;
894 equal = ((s64) equal) >> 63;
895
896 all_equal_so_far &= equal;
897 }
898
899 /*
900 * if all_equal_so_far is still all ones then the two values are equal
901 * and so out >= kPrime is true.
902 */
903 result |= all_equal_so_far;
904
905 /* if out >= kPrime then we subtract kPrime. */
906 subtract_u64(&out[0], &carry, result & kPrime[0]);
907 subtract_u64(&out[1], &carry, carry);
908 subtract_u64(&out[2], &carry, carry);
909 subtract_u64(&out[3], &carry, carry);
910
911 subtract_u64(&out[1], &carry, result & kPrime[1]);
912 subtract_u64(&out[2], &carry, carry);
913 subtract_u64(&out[3], &carry, carry);
914
915 subtract_u64(&out[2], &carry, result & kPrime[2]);
916 subtract_u64(&out[3], &carry, carry);
917
918 subtract_u64(&out[3], &carry, result & kPrime[3]);
919}
3e00b4c9
BM
920
921static void smallfelem_square_contract(smallfelem out, const smallfelem in)
0f113f3e
MC
922{
923 longfelem longtmp;
924 felem tmp;
925
926 smallfelem_square(longtmp, in);
927 felem_reduce(tmp, longtmp);
928 felem_contract(out, tmp);
929}
930
931static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
932 const smallfelem in2)
933{
934 longfelem longtmp;
935 felem tmp;
936
937 smallfelem_mul(longtmp, in1, in2);
938 felem_reduce(tmp, longtmp);
939 felem_contract(out, tmp);
940}
3e00b4c9 941
1d97c843
TH
942/*-
943 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
3e00b4c9
BM
944 * otherwise.
945 * On entry:
946 * small[i] < 2^64
947 */
948static limb smallfelem_is_zero(const smallfelem small)
0f113f3e
MC
949{
950 limb result;
951 u64 is_p;
952
953 u64 is_zero = small[0] | small[1] | small[2] | small[3];
954 is_zero--;
955 is_zero &= is_zero << 32;
956 is_zero &= is_zero << 16;
957 is_zero &= is_zero << 8;
958 is_zero &= is_zero << 4;
959 is_zero &= is_zero << 2;
960 is_zero &= is_zero << 1;
961 is_zero = ((s64) is_zero) >> 63;
962
963 is_p = (small[0] ^ kPrime[0]) |
964 (small[1] ^ kPrime[1]) |
965 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
966 is_p--;
967 is_p &= is_p << 32;
968 is_p &= is_p << 16;
969 is_p &= is_p << 8;
970 is_p &= is_p << 4;
971 is_p &= is_p << 2;
972 is_p &= is_p << 1;
973 is_p = ((s64) is_p) >> 63;
974
975 is_zero |= is_p;
976
977 result = is_zero;
978 result |= ((limb) is_zero) << 64;
979 return result;
980}
3e00b4c9
BM
981
982static int smallfelem_is_zero_int(const smallfelem small)
0f113f3e
MC
983{
984 return (int)(smallfelem_is_zero(small) & ((limb) 1));
985}
3e00b4c9 986
1d97c843
TH
987/*-
988 * felem_inv calculates |out| = |in|^{-1}
3e00b4c9
BM
989 *
990 * Based on Fermat's Little Theorem:
991 * a^p = a (mod p)
992 * a^{p-1} = 1 (mod p)
993 * a^{p-2} = a^{-1} (mod p)
994 */
995static void felem_inv(felem out, const felem in)
0f113f3e
MC
996{
997 felem ftmp, ftmp2;
998 /* each e_I will hold |in|^{2^I - 1} */
999 felem e2, e4, e8, e16, e32, e64;
1000 longfelem tmp;
1001 unsigned i;
1002
1003 felem_square(tmp, in);
1004 felem_reduce(ftmp, tmp); /* 2^1 */
1005 felem_mul(tmp, in, ftmp);
1006 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
1007 felem_assign(e2, ftmp);
1008 felem_square(tmp, ftmp);
1009 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
1010 felem_square(tmp, ftmp);
1011 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1012 felem_mul(tmp, ftmp, e2);
1013 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1014 felem_assign(e4, ftmp);
1015 felem_square(tmp, ftmp);
1016 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1017 felem_square(tmp, ftmp);
1018 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1019 felem_square(tmp, ftmp);
1020 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1021 felem_square(tmp, ftmp);
1022 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1023 felem_mul(tmp, ftmp, e4);
1024 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1025 felem_assign(e8, ftmp);
1026 for (i = 0; i < 8; i++) {
1027 felem_square(tmp, ftmp);
1028 felem_reduce(ftmp, tmp);
1029 } /* 2^16 - 2^8 */
1030 felem_mul(tmp, ftmp, e8);
1031 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1032 felem_assign(e16, ftmp);
1033 for (i = 0; i < 16; i++) {
1034 felem_square(tmp, ftmp);
1035 felem_reduce(ftmp, tmp);
1036 } /* 2^32 - 2^16 */
1037 felem_mul(tmp, ftmp, e16);
1038 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1039 felem_assign(e32, ftmp);
1040 for (i = 0; i < 32; i++) {
1041 felem_square(tmp, ftmp);
1042 felem_reduce(ftmp, tmp);
1043 } /* 2^64 - 2^32 */
1044 felem_assign(e64, ftmp);
1045 felem_mul(tmp, ftmp, in);
1046 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1047 for (i = 0; i < 192; i++) {
1048 felem_square(tmp, ftmp);
1049 felem_reduce(ftmp, tmp);
1050 } /* 2^256 - 2^224 + 2^192 */
1051
1052 felem_mul(tmp, e64, e32);
1053 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1054 for (i = 0; i < 16; i++) {
1055 felem_square(tmp, ftmp2);
1056 felem_reduce(ftmp2, tmp);
1057 } /* 2^80 - 2^16 */
1058 felem_mul(tmp, ftmp2, e16);
1059 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1060 for (i = 0; i < 8; i++) {
1061 felem_square(tmp, ftmp2);
1062 felem_reduce(ftmp2, tmp);
1063 } /* 2^88 - 2^8 */
1064 felem_mul(tmp, ftmp2, e8);
1065 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1066 for (i = 0; i < 4; i++) {
1067 felem_square(tmp, ftmp2);
1068 felem_reduce(ftmp2, tmp);
1069 } /* 2^92 - 2^4 */
1070 felem_mul(tmp, ftmp2, e4);
1071 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1072 felem_square(tmp, ftmp2);
1073 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1074 felem_square(tmp, ftmp2);
1075 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1076 felem_mul(tmp, ftmp2, e2);
1077 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1078 felem_square(tmp, ftmp2);
1079 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1080 felem_square(tmp, ftmp2);
1081 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1082 felem_mul(tmp, ftmp2, in);
1083 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1084
1085 felem_mul(tmp, ftmp2, ftmp);
1086 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1087}
3e00b4c9
BM
1088
1089static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
0f113f3e
MC
1090{
1091 felem tmp;
3e00b4c9 1092
0f113f3e
MC
1093 smallfelem_expand(tmp, in);
1094 felem_inv(tmp, tmp);
1095 felem_contract(out, tmp);
1096}
3e00b4c9 1097
1d97c843
TH
1098/*-
1099 * Group operations
3e00b4c9
BM
1100 * ----------------
1101 *
1102 * Building on top of the field operations we have the operations on the
1103 * elliptic curve group itself. Points on the curve are represented in Jacobian
35a1cc90
MC
1104 * coordinates
1105 */
3e00b4c9 1106
1d97c843
TH
1107/*-
1108 * point_double calculates 2*(x_in, y_in, z_in)
3e00b4c9
BM
1109 *
1110 * The method is taken from:
1111 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1112 *
1113 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
35a1cc90
MC
1114 * while x_out == y_in is not (maybe this works, but it's not tested).
1115 */
3e00b4c9
BM
1116static void
1117point_double(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1118 const felem x_in, const felem y_in, const felem z_in)
1119{
1120 longfelem tmp, tmp2;
1121 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1122 smallfelem small1, small2;
1123
1124 felem_assign(ftmp, x_in);
1125 /* ftmp[i] < 2^106 */
1126 felem_assign(ftmp2, x_in);
1127 /* ftmp2[i] < 2^106 */
1128
1129 /* delta = z^2 */
1130 felem_square(tmp, z_in);
1131 felem_reduce(delta, tmp);
1132 /* delta[i] < 2^101 */
1133
1134 /* gamma = y^2 */
1135 felem_square(tmp, y_in);
1136 felem_reduce(gamma, tmp);
1137 /* gamma[i] < 2^101 */
1138 felem_shrink(small1, gamma);
1139
1140 /* beta = x*gamma */
1141 felem_small_mul(tmp, small1, x_in);
1142 felem_reduce(beta, tmp);
1143 /* beta[i] < 2^101 */
1144
1145 /* alpha = 3*(x-delta)*(x+delta) */
1146 felem_diff(ftmp, delta);
1147 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1148 felem_sum(ftmp2, delta);
1149 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1150 felem_scalar(ftmp2, 3);
1151 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1152 felem_mul(tmp, ftmp, ftmp2);
1153 felem_reduce(alpha, tmp);
1154 /* alpha[i] < 2^101 */
1155 felem_shrink(small2, alpha);
1156
1157 /* x' = alpha^2 - 8*beta */
1158 smallfelem_square(tmp, small2);
1159 felem_reduce(x_out, tmp);
1160 felem_assign(ftmp, beta);
1161 felem_scalar(ftmp, 8);
1162 /* ftmp[i] < 8 * 2^101 = 2^104 */
1163 felem_diff(x_out, ftmp);
1164 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1165
1166 /* z' = (y + z)^2 - gamma - delta */
1167 felem_sum(delta, gamma);
1168 /* delta[i] < 2^101 + 2^101 = 2^102 */
1169 felem_assign(ftmp, y_in);
1170 felem_sum(ftmp, z_in);
1171 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1172 felem_square(tmp, ftmp);
1173 felem_reduce(z_out, tmp);
1174 felem_diff(z_out, delta);
1175 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1176
1177 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1178 felem_scalar(beta, 4);
1179 /* beta[i] < 4 * 2^101 = 2^103 */
1180 felem_diff_zero107(beta, x_out);
1181 /* beta[i] < 2^107 + 2^103 < 2^108 */
1182 felem_small_mul(tmp, small2, beta);
1183 /* tmp[i] < 7 * 2^64 < 2^67 */
1184 smallfelem_square(tmp2, small1);
1185 /* tmp2[i] < 7 * 2^64 */
1186 longfelem_scalar(tmp2, 8);
1187 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1188 longfelem_diff(tmp, tmp2);
1189 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1190 felem_reduce_zero105(y_out, tmp);
1191 /* y_out[i] < 2^106 */
1192}
1193
1194/*
1195 * point_double_small is the same as point_double, except that it operates on
1196 * smallfelems
1197 */
3e00b4c9
BM
1198static void
1199point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
0f113f3e
MC
1200 const smallfelem x_in, const smallfelem y_in,
1201 const smallfelem z_in)
1202{
1203 felem felem_x_out, felem_y_out, felem_z_out;
1204 felem felem_x_in, felem_y_in, felem_z_in;
1205
1206 smallfelem_expand(felem_x_in, x_in);
1207 smallfelem_expand(felem_y_in, y_in);
1208 smallfelem_expand(felem_z_in, z_in);
1209 point_double(felem_x_out, felem_y_out, felem_z_out,
1210 felem_x_in, felem_y_in, felem_z_in);
1211 felem_shrink(x_out, felem_x_out);
1212 felem_shrink(y_out, felem_y_out);
1213 felem_shrink(z_out, felem_z_out);
1214}
3e00b4c9
BM
1215
1216/* copy_conditional copies in to out iff mask is all ones. */
0f113f3e
MC
1217static void copy_conditional(felem out, const felem in, limb mask)
1218{
1219 unsigned i;
1220 for (i = 0; i < NLIMBS; ++i) {
1221 const limb tmp = mask & (in[i] ^ out[i]);
1222 out[i] ^= tmp;
1223 }
1224}
3e00b4c9
BM
1225
1226/* copy_small_conditional copies in to out iff mask is all ones. */
0f113f3e
MC
1227static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1228{
1229 unsigned i;
1230 const u64 mask64 = mask;
1231 for (i = 0; i < NLIMBS; ++i) {
1232 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1233 }
1234}
3e00b4c9 1235
1d97c843 1236/*-
0d4fb843 1237 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
3e00b4c9
BM
1238 *
1239 * The method is taken from:
1240 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1241 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1242 *
1243 * This function includes a branch for checking whether the two input points
1244 * are equal, (while not equal to the point at infinity). This case never
1245 * happens during single point multiplication, so there is no timing leak for
35a1cc90
MC
1246 * ECDH or ECDSA signing.
1247 */
3e00b4c9 1248static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
1249 const felem x1, const felem y1, const felem z1,
1250 const int mixed, const smallfelem x2,
1251 const smallfelem y2, const smallfelem z2)
1252{
1253 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1254 longfelem tmp, tmp2;
1255 smallfelem small1, small2, small3, small4, small5;
1256 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1257
1258 felem_shrink(small3, z1);
1259
1260 z1_is_zero = smallfelem_is_zero(small3);
1261 z2_is_zero = smallfelem_is_zero(z2);
1262
1263 /* ftmp = z1z1 = z1**2 */
1264 smallfelem_square(tmp, small3);
1265 felem_reduce(ftmp, tmp);
1266 /* ftmp[i] < 2^101 */
1267 felem_shrink(small1, ftmp);
1268
1269 if (!mixed) {
1270 /* ftmp2 = z2z2 = z2**2 */
1271 smallfelem_square(tmp, z2);
1272 felem_reduce(ftmp2, tmp);
1273 /* ftmp2[i] < 2^101 */
1274 felem_shrink(small2, ftmp2);
1275
1276 felem_shrink(small5, x1);
1277
1278 /* u1 = ftmp3 = x1*z2z2 */
1279 smallfelem_mul(tmp, small5, small2);
1280 felem_reduce(ftmp3, tmp);
1281 /* ftmp3[i] < 2^101 */
1282
1283 /* ftmp5 = z1 + z2 */
1284 felem_assign(ftmp5, z1);
1285 felem_small_sum(ftmp5, z2);
1286 /* ftmp5[i] < 2^107 */
1287
1288 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1289 felem_square(tmp, ftmp5);
1290 felem_reduce(ftmp5, tmp);
1291 /* ftmp2 = z2z2 + z1z1 */
1292 felem_sum(ftmp2, ftmp);
1293 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1294 felem_diff(ftmp5, ftmp2);
1295 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1296
1297 /* ftmp2 = z2 * z2z2 */
1298 smallfelem_mul(tmp, small2, z2);
1299 felem_reduce(ftmp2, tmp);
1300
1301 /* s1 = ftmp2 = y1 * z2**3 */
1302 felem_mul(tmp, y1, ftmp2);
1303 felem_reduce(ftmp6, tmp);
1304 /* ftmp6[i] < 2^101 */
1305 } else {
1306 /*
1307 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1308 */
1309
1310 /* u1 = ftmp3 = x1*z2z2 */
1311 felem_assign(ftmp3, x1);
1312 /* ftmp3[i] < 2^106 */
1313
1314 /* ftmp5 = 2z1z2 */
1315 felem_assign(ftmp5, z1);
1316 felem_scalar(ftmp5, 2);
1317 /* ftmp5[i] < 2*2^106 = 2^107 */
1318
1319 /* s1 = ftmp2 = y1 * z2**3 */
1320 felem_assign(ftmp6, y1);
1321 /* ftmp6[i] < 2^106 */
1322 }
1323
1324 /* u2 = x2*z1z1 */
1325 smallfelem_mul(tmp, x2, small1);
1326 felem_reduce(ftmp4, tmp);
1327
1328 /* h = ftmp4 = u2 - u1 */
1329 felem_diff_zero107(ftmp4, ftmp3);
1330 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1331 felem_shrink(small4, ftmp4);
1332
1333 x_equal = smallfelem_is_zero(small4);
1334
1335 /* z_out = ftmp5 * h */
1336 felem_small_mul(tmp, small4, ftmp5);
1337 felem_reduce(z_out, tmp);
1338 /* z_out[i] < 2^101 */
1339
1340 /* ftmp = z1 * z1z1 */
1341 smallfelem_mul(tmp, small1, small3);
1342 felem_reduce(ftmp, tmp);
1343
1344 /* s2 = tmp = y2 * z1**3 */
1345 felem_small_mul(tmp, y2, ftmp);
1346 felem_reduce(ftmp5, tmp);
1347
1348 /* r = ftmp5 = (s2 - s1)*2 */
1349 felem_diff_zero107(ftmp5, ftmp6);
1350 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1351 felem_scalar(ftmp5, 2);
1352 /* ftmp5[i] < 2^109 */
1353 felem_shrink(small1, ftmp5);
1354 y_equal = smallfelem_is_zero(small1);
1355
1356 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1357 point_double(x3, y3, z3, x1, y1, z1);
1358 return;
1359 }
1360
1361 /* I = ftmp = (2h)**2 */
1362 felem_assign(ftmp, ftmp4);
1363 felem_scalar(ftmp, 2);
1364 /* ftmp[i] < 2*2^108 = 2^109 */
1365 felem_square(tmp, ftmp);
1366 felem_reduce(ftmp, tmp);
1367
1368 /* J = ftmp2 = h * I */
1369 felem_mul(tmp, ftmp4, ftmp);
1370 felem_reduce(ftmp2, tmp);
1371
1372 /* V = ftmp4 = U1 * I */
1373 felem_mul(tmp, ftmp3, ftmp);
1374 felem_reduce(ftmp4, tmp);
1375
1376 /* x_out = r**2 - J - 2V */
1377 smallfelem_square(tmp, small1);
1378 felem_reduce(x_out, tmp);
1379 felem_assign(ftmp3, ftmp4);
1380 felem_scalar(ftmp4, 2);
1381 felem_sum(ftmp4, ftmp2);
1382 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1383 felem_diff(x_out, ftmp4);
1384 /* x_out[i] < 2^105 + 2^101 */
1385
1386 /* y_out = r(V-x_out) - 2 * s1 * J */
1387 felem_diff_zero107(ftmp3, x_out);
1388 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1389 felem_small_mul(tmp, small1, ftmp3);
1390 felem_mul(tmp2, ftmp6, ftmp2);
1391 longfelem_scalar(tmp2, 2);
1392 /* tmp2[i] < 2*2^67 = 2^68 */
1393 longfelem_diff(tmp, tmp2);
1394 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1395 felem_reduce_zero105(y_out, tmp);
1396 /* y_out[i] < 2^106 */
1397
1398 copy_small_conditional(x_out, x2, z1_is_zero);
1399 copy_conditional(x_out, x1, z2_is_zero);
1400 copy_small_conditional(y_out, y2, z1_is_zero);
1401 copy_conditional(y_out, y1, z2_is_zero);
1402 copy_small_conditional(z_out, z2, z1_is_zero);
1403 copy_conditional(z_out, z1, z2_is_zero);
1404 felem_assign(x3, x_out);
1405 felem_assign(y3, y_out);
1406 felem_assign(z3, z_out);
1407}
1408
1409/*
1410 * point_add_small is the same as point_add, except that it operates on
1411 * smallfelems
1412 */
3e00b4c9 1413static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
0f113f3e
MC
1414 smallfelem x1, smallfelem y1, smallfelem z1,
1415 smallfelem x2, smallfelem y2, smallfelem z2)
1416{
1417 felem felem_x3, felem_y3, felem_z3;
1418 felem felem_x1, felem_y1, felem_z1;
1419 smallfelem_expand(felem_x1, x1);
1420 smallfelem_expand(felem_y1, y1);
1421 smallfelem_expand(felem_z1, z1);
1422 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1423 x2, y2, z2);
1424 felem_shrink(x3, felem_x3);
1425 felem_shrink(y3, felem_y3);
1426 felem_shrink(z3, felem_z3);
1427}
3e00b4c9 1428
1d97c843
TH
1429/*-
1430 * Base point pre computation
3e00b4c9
BM
1431 * --------------------------
1432 *
1433 * Two different sorts of precomputed tables are used in the following code.
1434 * Each contain various points on the curve, where each point is three field
1435 * elements (x, y, z).
1436 *
1437 * For the base point table, z is usually 1 (0 for the point at infinity).
1438 * This table has 2 * 16 elements, starting with the following:
1439 * index | bits | point
1440 * ------+---------+------------------------------
1441 * 0 | 0 0 0 0 | 0G
1442 * 1 | 0 0 0 1 | 1G
1443 * 2 | 0 0 1 0 | 2^64G
1444 * 3 | 0 0 1 1 | (2^64 + 1)G
1445 * 4 | 0 1 0 0 | 2^128G
1446 * 5 | 0 1 0 1 | (2^128 + 1)G
1447 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1448 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1449 * 8 | 1 0 0 0 | 2^192G
1450 * 9 | 1 0 0 1 | (2^192 + 1)G
1451 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1452 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1453 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1454 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1455 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1456 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1457 * followed by a copy of this with each element multiplied by 2^32.
1458 *
1459 * The reason for this is so that we can clock bits into four different
1460 * locations when doing simple scalar multiplies against the base point,
1461 * and then another four locations using the second 16 elements.
1462 *
1463 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1464
1465/* gmul is the table of precomputed base points */
b853717f 1466static const smallfelem gmul[2][16][3] = {
0f113f3e
MC
1467 {{{0, 0, 0, 0},
1468 {0, 0, 0, 0},
1469 {0, 0, 0, 0}},
1470 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1471 0x6b17d1f2e12c4247},
1472 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1473 0x4fe342e2fe1a7f9b},
1474 {1, 0, 0, 0}},
1475 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1476 0x0fa822bc2811aaa5},
1477 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1478 0xbff44ae8f5dba80d},
1479 {1, 0, 0, 0}},
1480 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1481 0x300a4bbc89d6726f},
1482 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1483 0x72aac7e0d09b4644},
1484 {1, 0, 0, 0}},
1485 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1486 0x447d739beedb5e67},
1487 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1488 0x2d4825ab834131ee},
1489 {1, 0, 0, 0}},
1490 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1491 0xef9519328a9c72ff},
1492 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1493 0x611e9fc37dbb2c9b},
1494 {1, 0, 0, 0}},
1495 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1496 0x550663797b51f5d8},
1497 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1498 0x157164848aecb851},
1499 {1, 0, 0, 0}},
1500 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1501 0xeb5d7745b21141ea},
1502 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1503 0xeafd72ebdbecc17b},
1504 {1, 0, 0, 0}},
1505 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1506 0xa6d39677a7849276},
1507 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1508 0x674f84749b0b8816},
1509 {1, 0, 0, 0}},
1510 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1511 0x4e769e7672c9ddad},
1512 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1513 0x42b99082de830663},
1514 {1, 0, 0, 0}},
1515 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1516 0x78878ef61c6ce04d},
1517 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1518 0xb6cb3f5d7b72c321},
1519 {1, 0, 0, 0}},
1520 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1521 0x0c88bc4d716b1287},
1522 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1523 0xdd5ddea3f3901dc6},
1524 {1, 0, 0, 0}},
1525 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1526 0x68f344af6b317466},
1527 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1528 0x31b9c405f8540a20},
1529 {1, 0, 0, 0}},
1530 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1531 0x4052bf4b6f461db9},
1532 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1533 0xfecf4d5190b0fc61},
1534 {1, 0, 0, 0}},
1535 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1536 0x1eddbae2c802e41a},
1537 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1538 0x43104d86560ebcfc},
1539 {1, 0, 0, 0}},
1540 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1541 0xb48e26b484f7a21c},
1542 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1543 0xfac015404d4d3dab},
1544 {1, 0, 0, 0}}},
1545 {{{0, 0, 0, 0},
1546 {0, 0, 0, 0},
1547 {0, 0, 0, 0}},
1548 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1549 0x7fe36b40af22af89},
1550 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1551 0xe697d45825b63624},
1552 {1, 0, 0, 0}},
1553 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1554 0x4a5b506612a677a6},
1555 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1556 0xeb13461ceac089f1},
1557 {1, 0, 0, 0}},
1558 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1559 0x0781b8291c6a220a},
1560 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1561 0x690cde8df0151593},
1562 {1, 0, 0, 0}},
1563 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1564 0x8a535f566ec73617},
1565 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1566 0x0455c08468b08bd7},
1567 {1, 0, 0, 0}},
1568 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1569 0x06bada7ab77f8276},
1570 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1571 0x5b476dfd0e6cb18a},
1572 {1, 0, 0, 0}},
1573 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1574 0x3e29864e8a2ec908},
1575 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1576 0x239b90ea3dc31e7e},
1577 {1, 0, 0, 0}},
1578 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1579 0x820f4dd949f72ff7},
1580 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1581 0x140406ec783a05ec},
1582 {1, 0, 0, 0}},
1583 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1584 0x68f6b8542783dfee},
1585 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1586 0xcbe1feba92e40ce6},
1587 {1, 0, 0, 0}},
1588 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1589 0xd0b2f94d2f420109},
1590 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1591 0x971459828b0719e5},
1592 {1, 0, 0, 0}},
1593 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1594 0x961610004a866aba},
1595 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1596 0x7acb9fadcee75e44},
1597 {1, 0, 0, 0}},
1598 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1599 0x24eb9acca333bf5b},
1600 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1601 0x69f891c5acd079cc},
1602 {1, 0, 0, 0}},
1603 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1604 0xe51f547c5972a107},
1605 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1606 0x1c309a2b25bb1387},
1607 {1, 0, 0, 0}},
1608 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1609 0x20b87b8aa2c4e503},
1610 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1611 0xf5c6fa49919776be},
1612 {1, 0, 0, 0}},
1613 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1614 0x1ed7d1b9332010b9},
1615 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1616 0x3a2b03f03217257a},
1617 {1, 0, 0, 0}},
1618 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1619 0x15fee545c78dd9f6},
1620 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1621 0x4ab5b6b2b8753f81},
1622 {1, 0, 0, 0}}}
1623};
1624
1625/*
1626 * select_point selects the |idx|th point from a precomputation table and
1627 * copies it to out.
1628 */
1629static void select_point(const u64 idx, unsigned int size,
1630 const smallfelem pre_comp[16][3], smallfelem out[3])
1631{
1632 unsigned i, j;
1633 u64 *outlimbs = &out[0][0];
16f8d4eb 1634
88f4c6f3 1635 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1636
1637 for (i = 0; i < size; i++) {
1638 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1639 u64 mask = i ^ idx;
1640 mask |= mask >> 4;
1641 mask |= mask >> 2;
1642 mask |= mask >> 1;
1643 mask &= 1;
1644 mask--;
1645 for (j = 0; j < NLIMBS * 3; j++)
1646 outlimbs[j] |= inlimbs[j] & mask;
1647 }
1648}
3e00b4c9
BM
1649
1650/* get_bit returns the |i|th bit in |in| */
1651static char get_bit(const felem_bytearray in, int i)
0f113f3e
MC
1652{
1653 if ((i < 0) || (i >= 256))
1654 return 0;
1655 return (in[i >> 3] >> (i & 7)) & 1;
1656}
1657
1658/*
1659 * Interleaved point multiplication using precomputed point multiples: The
1660 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1661 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1662 * generator, using certain (large) precomputed multiples in g_pre_comp.
1663 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1664 */
3e00b4c9 1665static void batch_mul(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1666 const felem_bytearray scalars[],
1667 const unsigned num_points, const u8 *g_scalar,
1668 const int mixed, const smallfelem pre_comp[][17][3],
1669 const smallfelem g_pre_comp[2][16][3])
1670{
1671 int i, skip;
1672 unsigned num, gen_mul = (g_scalar != NULL);
1673 felem nq[3], ftmp;
1674 smallfelem tmp[3];
1675 u64 bits;
1676 u8 sign, digit;
1677
1678 /* set nq to the point at infinity */
16f8d4eb 1679 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1680
1681 /*
1682 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1683 * of the generator (two in each of the last 32 rounds) and additions of
1684 * other points multiples (every 5th round).
1685 */
1686 skip = 1; /* save two point operations in the first
1687 * round */
1688 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1689 /* double */
1690 if (!skip)
1691 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1692
1693 /* add multiples of the generator */
1694 if (gen_mul && (i <= 31)) {
1695 /* first, look 32 bits upwards */
1696 bits = get_bit(g_scalar, i + 224) << 3;
1697 bits |= get_bit(g_scalar, i + 160) << 2;
1698 bits |= get_bit(g_scalar, i + 96) << 1;
1699 bits |= get_bit(g_scalar, i + 32);
1700 /* select the point to add, in constant time */
1701 select_point(bits, 16, g_pre_comp[1], tmp);
1702
1703 if (!skip) {
1704 /* Arg 1 below is for "mixed" */
1705 point_add(nq[0], nq[1], nq[2],
1706 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1707 } else {
1708 smallfelem_expand(nq[0], tmp[0]);
1709 smallfelem_expand(nq[1], tmp[1]);
1710 smallfelem_expand(nq[2], tmp[2]);
1711 skip = 0;
1712 }
1713
1714 /* second, look at the current position */
1715 bits = get_bit(g_scalar, i + 192) << 3;
1716 bits |= get_bit(g_scalar, i + 128) << 2;
1717 bits |= get_bit(g_scalar, i + 64) << 1;
1718 bits |= get_bit(g_scalar, i);
1719 /* select the point to add, in constant time */
1720 select_point(bits, 16, g_pre_comp[0], tmp);
1721 /* Arg 1 below is for "mixed" */
1722 point_add(nq[0], nq[1], nq[2],
1723 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1724 }
1725
1726 /* do other additions every 5 doublings */
1727 if (num_points && (i % 5 == 0)) {
1728 /* loop over all scalars */
1729 for (num = 0; num < num_points; ++num) {
1730 bits = get_bit(scalars[num], i + 4) << 5;
1731 bits |= get_bit(scalars[num], i + 3) << 4;
1732 bits |= get_bit(scalars[num], i + 2) << 3;
1733 bits |= get_bit(scalars[num], i + 1) << 2;
1734 bits |= get_bit(scalars[num], i) << 1;
1735 bits |= get_bit(scalars[num], i - 1);
1736 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1737
1738 /*
1739 * select the point to add or subtract, in constant time
1740 */
1741 select_point(digit, 17, pre_comp[num], tmp);
1742 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1743 * point */
1744 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1745 felem_contract(tmp[1], ftmp);
1746
1747 if (!skip) {
1748 point_add(nq[0], nq[1], nq[2],
1749 nq[0], nq[1], nq[2],
1750 mixed, tmp[0], tmp[1], tmp[2]);
1751 } else {
1752 smallfelem_expand(nq[0], tmp[0]);
1753 smallfelem_expand(nq[1], tmp[1]);
1754 smallfelem_expand(nq[2], tmp[2]);
1755 skip = 0;
1756 }
1757 }
1758 }
1759 }
1760 felem_assign(x_out, nq[0]);
1761 felem_assign(y_out, nq[1]);
1762 felem_assign(z_out, nq[2]);
1763}
3e00b4c9
BM
1764
1765/* Precomputation for the group generator. */
3aef36ff 1766struct nistp256_pre_comp_st {
0f113f3e 1767 smallfelem g_pre_comp[2][16][3];
2f545ae4 1768 CRYPTO_REF_COUNT references;
9b398ef2 1769 CRYPTO_RWLOCK *lock;
3aef36ff 1770};
3e00b4c9
BM
1771
1772const EC_METHOD *EC_GFp_nistp256_method(void)
0f113f3e
MC
1773{
1774 static const EC_METHOD ret = {
1775 EC_FLAGS_DEFAULT_OCT,
1776 NID_X9_62_prime_field,
1777 ec_GFp_nistp256_group_init,
1778 ec_GFp_simple_group_finish,
1779 ec_GFp_simple_group_clear_finish,
1780 ec_GFp_nist_group_copy,
1781 ec_GFp_nistp256_group_set_curve,
1782 ec_GFp_simple_group_get_curve,
1783 ec_GFp_simple_group_get_degree,
9ff9bccc 1784 ec_group_simple_order_bits,
0f113f3e
MC
1785 ec_GFp_simple_group_check_discriminant,
1786 ec_GFp_simple_point_init,
1787 ec_GFp_simple_point_finish,
1788 ec_GFp_simple_point_clear_finish,
1789 ec_GFp_simple_point_copy,
1790 ec_GFp_simple_point_set_to_infinity,
1791 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1792 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1793 ec_GFp_simple_point_set_affine_coordinates,
1794 ec_GFp_nistp256_point_get_affine_coordinates,
1795 0 /* point_set_compressed_coordinates */ ,
1796 0 /* point2oct */ ,
1797 0 /* oct2point */ ,
1798 ec_GFp_simple_add,
1799 ec_GFp_simple_dbl,
1800 ec_GFp_simple_invert,
1801 ec_GFp_simple_is_at_infinity,
1802 ec_GFp_simple_is_on_curve,
1803 ec_GFp_simple_cmp,
1804 ec_GFp_simple_make_affine,
1805 ec_GFp_simple_points_make_affine,
1806 ec_GFp_nistp256_points_mul,
1807 ec_GFp_nistp256_precompute_mult,
1808 ec_GFp_nistp256_have_precompute_mult,
1809 ec_GFp_nist_field_mul,
1810 ec_GFp_nist_field_sqr,
1811 0 /* field_div */ ,
1812 0 /* field_encode */ ,
1813 0 /* field_decode */ ,
9ff9bccc
DSH
1814 0, /* field_set_to_one */
1815 ec_key_simple_priv2oct,
1816 ec_key_simple_oct2priv,
1817 0, /* set private */
1818 ec_key_simple_generate_key,
1819 ec_key_simple_check_key,
1820 ec_key_simple_generate_public_key,
1821 0, /* keycopy */
1822 0, /* keyfinish */
1823 ecdh_simple_compute_key
0f113f3e
MC
1824 };
1825
1826 return &ret;
1827}
3e00b4c9
BM
1828
1829/******************************************************************************/
0f113f3e
MC
1830/*
1831 * FUNCTIONS TO MANAGE PRECOMPUTATION
3e00b4c9
BM
1832 */
1833
1834static NISTP256_PRE_COMP *nistp256_pre_comp_new()
0f113f3e 1835{
a2d0baa2
F
1836 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1837
90945fa3 1838 if (ret == NULL) {
0f113f3e
MC
1839 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1840 return ret;
1841 }
a2d0baa2 1842
0f113f3e 1843 ret->references = 1;
9b398ef2
AG
1844
1845 ret->lock = CRYPTO_THREAD_lock_new();
1846 if (ret->lock == NULL) {
1847 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1848 OPENSSL_free(ret);
1849 return NULL;
1850 }
0f113f3e
MC
1851 return ret;
1852}
3e00b4c9 1853
3aef36ff 1854NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
0f113f3e 1855{
9b398ef2 1856 int i;
3aef36ff 1857 if (p != NULL)
2f545ae4 1858 CRYPTO_UP_REF(&p->references, &i, p->lock);
3aef36ff 1859 return p;
0f113f3e 1860}
3e00b4c9 1861
3aef36ff 1862void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
0f113f3e 1863{
9b398ef2
AG
1864 int i;
1865
1866 if (pre == NULL)
0f113f3e 1867 return;
9b398ef2 1868
2f545ae4 1869 CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
9b398ef2
AG
1870 REF_PRINT_COUNT("EC_nistp256", x);
1871 if (i > 0)
1872 return;
1873 REF_ASSERT_ISNT(i < 0);
1874
1875 CRYPTO_THREAD_lock_free(pre->lock);
0f113f3e
MC
1876 OPENSSL_free(pre);
1877}
3e00b4c9 1878
3e00b4c9 1879/******************************************************************************/
0f113f3e
MC
1880/*
1881 * OPENSSL EC_METHOD FUNCTIONS
3e00b4c9
BM
1882 */
1883
1884int ec_GFp_nistp256_group_init(EC_GROUP *group)
0f113f3e
MC
1885{
1886 int ret;
1887 ret = ec_GFp_simple_group_init(group);
1888 group->a_is_minus3 = 1;
1889 return ret;
1890}
3e00b4c9
BM
1891
1892int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
0f113f3e
MC
1893 const BIGNUM *a, const BIGNUM *b,
1894 BN_CTX *ctx)
1895{
1896 int ret = 0;
1897 BN_CTX *new_ctx = NULL;
1898 BIGNUM *curve_p, *curve_a, *curve_b;
1899
1900 if (ctx == NULL)
1901 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1902 return 0;
1903 BN_CTX_start(ctx);
1904 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1905 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1906 ((curve_b = BN_CTX_get(ctx)) == NULL))
1907 goto err;
1908 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1909 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1910 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1911 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1912 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1913 EC_R_WRONG_CURVE_PARAMETERS);
1914 goto err;
1915 }
1916 group->field_mod_func = BN_nist_mod_256;
1917 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1918 err:
1919 BN_CTX_end(ctx);
23a1d5e9 1920 BN_CTX_free(new_ctx);
0f113f3e
MC
1921 return ret;
1922}
1923
1924/*
1925 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1926 * (X/Z^2, Y/Z^3)
1927 */
3e00b4c9 1928int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
0f113f3e
MC
1929 const EC_POINT *point,
1930 BIGNUM *x, BIGNUM *y,
1931 BN_CTX *ctx)
1932{
1933 felem z1, z2, x_in, y_in;
1934 smallfelem x_out, y_out;
1935 longfelem tmp;
1936
1937 if (EC_POINT_is_at_infinity(group, point)) {
1938 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1939 EC_R_POINT_AT_INFINITY);
1940 return 0;
1941 }
ace8f546
AP
1942 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1943 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1944 return 0;
1945 felem_inv(z2, z1);
1946 felem_square(tmp, z2);
1947 felem_reduce(z1, tmp);
1948 felem_mul(tmp, x_in, z1);
1949 felem_reduce(x_in, tmp);
1950 felem_contract(x_out, x_in);
1951 if (x != NULL) {
1952 if (!smallfelem_to_BN(x, x_out)) {
1953 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1954 ERR_R_BN_LIB);
1955 return 0;
1956 }
1957 }
1958 felem_mul(tmp, z1, z2);
1959 felem_reduce(z1, tmp);
1960 felem_mul(tmp, y_in, z1);
1961 felem_reduce(y_in, tmp);
1962 felem_contract(y_out, y_in);
1963 if (y != NULL) {
1964 if (!smallfelem_to_BN(y, y_out)) {
1965 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1966 ERR_R_BN_LIB);
1967 return 0;
1968 }
1969 }
1970 return 1;
1971}
3e00b4c9 1972
b853717f 1973/* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
0f113f3e
MC
1974static void make_points_affine(size_t num, smallfelem points[][3],
1975 smallfelem tmp_smallfelems[])
1976{
1977 /*
1978 * Runs in constant time, unless an input is the point at infinity (which
1979 * normally shouldn't happen).
1980 */
1981 ec_GFp_nistp_points_make_affine_internal(num,
1982 points,
1983 sizeof(smallfelem),
1984 tmp_smallfelems,
1985 (void (*)(void *))smallfelem_one,
1986 (int (*)(const void *))
1987 smallfelem_is_zero_int,
1988 (void (*)(void *, const void *))
1989 smallfelem_assign,
1990 (void (*)(void *, const void *))
1991 smallfelem_square_contract,
1992 (void (*)
1993 (void *, const void *,
1994 const void *))
1995 smallfelem_mul_contract,
1996 (void (*)(void *, const void *))
1997 smallfelem_inv_contract,
1998 /* nothing to contract */
1999 (void (*)(void *, const void *))
2000 smallfelem_assign);
2001}
2002
2003/*
2004 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2005 * values Result is stored in r (r can equal one of the inputs).
2006 */
3e00b4c9 2007int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
0f113f3e
MC
2008 const BIGNUM *scalar, size_t num,
2009 const EC_POINT *points[],
2010 const BIGNUM *scalars[], BN_CTX *ctx)
2011{
2012 int ret = 0;
2013 int j;
2014 int mixed = 0;
2015 BN_CTX *new_ctx = NULL;
2016 BIGNUM *x, *y, *z, *tmp_scalar;
2017 felem_bytearray g_secret;
2018 felem_bytearray *secrets = NULL;
16f8d4eb 2019 smallfelem (*pre_comp)[17][3] = NULL;
0f113f3e
MC
2020 smallfelem *tmp_smallfelems = NULL;
2021 felem_bytearray tmp;
2022 unsigned i, num_bytes;
2023 int have_pre_comp = 0;
2024 size_t num_points = num;
2025 smallfelem x_in, y_in, z_in;
2026 felem x_out, y_out, z_out;
2027 NISTP256_PRE_COMP *pre = NULL;
2028 const smallfelem(*g_pre_comp)[16][3] = NULL;
2029 EC_POINT *generator = NULL;
2030 const EC_POINT *p = NULL;
2031 const BIGNUM *p_scalar = NULL;
2032
2033 if (ctx == NULL)
2034 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2035 return 0;
2036 BN_CTX_start(ctx);
2037 if (((x = BN_CTX_get(ctx)) == NULL) ||
2038 ((y = BN_CTX_get(ctx)) == NULL) ||
2039 ((z = BN_CTX_get(ctx)) == NULL) ||
2040 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
2041 goto err;
2042
2043 if (scalar != NULL) {
3aef36ff 2044 pre = group->pre_comp.nistp256;
0f113f3e
MC
2045 if (pre)
2046 /* we have precomputation, try to use it */
2047 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2048 else
2049 /* try to use the standard precomputation */
2050 g_pre_comp = &gmul[0];
2051 generator = EC_POINT_new(group);
2052 if (generator == NULL)
2053 goto err;
2054 /* get the generator from precomputation */
2055 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2056 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2057 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2058 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2059 goto err;
2060 }
2061 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
2062 generator, x, y, z,
2063 ctx))
2064 goto err;
2065 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2066 /* precomputation matches generator */
2067 have_pre_comp = 1;
2068 else
2069 /*
2070 * we don't have valid precomputation: treat the generator as a
2071 * random point
2072 */
2073 num_points++;
2074 }
2075 if (num_points > 0) {
2076 if (num_points >= 3) {
2077 /*
2078 * unless we precompute multiples for just one or two points,
2079 * converting those into affine form is time well spent
2080 */
2081 mixed = 1;
2082 }
16f8d4eb
RS
2083 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2084 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
2085 if (mixed)
2086 tmp_smallfelems =
16f8d4eb 2087 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
0f113f3e
MC
2088 if ((secrets == NULL) || (pre_comp == NULL)
2089 || (mixed && (tmp_smallfelems == NULL))) {
2090 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
2091 goto err;
2092 }
2093
2094 /*
2095 * we treat NULL scalars as 0, and NULL points as points at infinity,
2096 * i.e., they contribute nothing to the linear combination
2097 */
16f8d4eb
RS
2098 memset(secrets, 0, sizeof(*secrets) * num_points);
2099 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
0f113f3e
MC
2100 for (i = 0; i < num_points; ++i) {
2101 if (i == num)
2102 /*
2103 * we didn't have a valid precomputation, so we pick the
2104 * generator
2105 */
2106 {
2107 p = EC_GROUP_get0_generator(group);
2108 p_scalar = scalar;
2109 } else
2110 /* the i^th point */
2111 {
2112 p = points[i];
2113 p_scalar = scalars[i];
2114 }
2115 if ((p_scalar != NULL) && (p != NULL)) {
2116 /* reduce scalar to 0 <= scalar < 2^256 */
2117 if ((BN_num_bits(p_scalar) > 256)
2118 || (BN_is_negative(p_scalar))) {
2119 /*
2120 * this is an unusual input, and we don't guarantee
2121 * constant-timeness
2122 */
ace8f546 2123 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
0f113f3e
MC
2124 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2125 goto err;
2126 }
2127 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2128 } else
2129 num_bytes = BN_bn2bin(p_scalar, tmp);
2130 flip_endian(secrets[i], tmp, num_bytes);
2131 /* precompute multiples */
ace8f546
AP
2132 if ((!BN_to_felem(x_out, p->X)) ||
2133 (!BN_to_felem(y_out, p->Y)) ||
2134 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
2135 goto err;
2136 felem_shrink(pre_comp[i][1][0], x_out);
2137 felem_shrink(pre_comp[i][1][1], y_out);
2138 felem_shrink(pre_comp[i][1][2], z_out);
2139 for (j = 2; j <= 16; ++j) {
2140 if (j & 1) {
2141 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2142 pre_comp[i][j][2], pre_comp[i][1][0],
2143 pre_comp[i][1][1], pre_comp[i][1][2],
2144 pre_comp[i][j - 1][0],
2145 pre_comp[i][j - 1][1],
2146 pre_comp[i][j - 1][2]);
2147 } else {
2148 point_double_small(pre_comp[i][j][0],
2149 pre_comp[i][j][1],
2150 pre_comp[i][j][2],
2151 pre_comp[i][j / 2][0],
2152 pre_comp[i][j / 2][1],
2153 pre_comp[i][j / 2][2]);
2154 }
2155 }
2156 }
2157 }
2158 if (mixed)
2159 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2160 }
2161
2162 /* the scalar for the generator */
2163 if ((scalar != NULL) && (have_pre_comp)) {
2164 memset(g_secret, 0, sizeof(g_secret));
2165 /* reduce scalar to 0 <= scalar < 2^256 */
2166 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2167 /*
2168 * this is an unusual input, and we don't guarantee
2169 * constant-timeness
2170 */
ace8f546 2171 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
0f113f3e
MC
2172 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2173 goto err;
2174 }
2175 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2176 } else
2177 num_bytes = BN_bn2bin(scalar, tmp);
2178 flip_endian(g_secret, tmp, num_bytes);
2179 /* do the multiplication with generator precomputation */
2180 batch_mul(x_out, y_out, z_out,
2181 (const felem_bytearray(*))secrets, num_points,
2182 g_secret,
2183 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2184 } else
2185 /* do the multiplication without generator precomputation */
2186 batch_mul(x_out, y_out, z_out,
2187 (const felem_bytearray(*))secrets, num_points,
2188 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2189 /* reduce the output to its unique minimal representation */
2190 felem_contract(x_in, x_out);
2191 felem_contract(y_in, y_out);
2192 felem_contract(z_in, z_out);
2193 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2194 (!smallfelem_to_BN(z, z_in))) {
2195 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2196 goto err;
2197 }
2198 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2199
2200 err:
2201 BN_CTX_end(ctx);
8fdc3734 2202 EC_POINT_free(generator);
23a1d5e9 2203 BN_CTX_free(new_ctx);
b548a1f1
RS
2204 OPENSSL_free(secrets);
2205 OPENSSL_free(pre_comp);
2206 OPENSSL_free(tmp_smallfelems);
0f113f3e
MC
2207 return ret;
2208}
3e00b4c9
BM
2209
2210int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
2211{
2212 int ret = 0;
2213 NISTP256_PRE_COMP *pre = NULL;
2214 int i, j;
2215 BN_CTX *new_ctx = NULL;
2216 BIGNUM *x, *y;
2217 EC_POINT *generator = NULL;
2218 smallfelem tmp_smallfelems[32];
2219 felem x_tmp, y_tmp, z_tmp;
2220
2221 /* throw away old precomputation */
2c52ac9b 2222 EC_pre_comp_free(group);
0f113f3e
MC
2223 if (ctx == NULL)
2224 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2225 return 0;
2226 BN_CTX_start(ctx);
2227 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2228 goto err;
2229 /* get the generator */
2230 if (group->generator == NULL)
2231 goto err;
2232 generator = EC_POINT_new(group);
2233 if (generator == NULL)
2234 goto err;
2235 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2236 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2237 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2238 goto err;
2239 if ((pre = nistp256_pre_comp_new()) == NULL)
2240 goto err;
2241 /*
2242 * if the generator is the standard one, use built-in precomputation
2243 */
2244 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2245 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 2246 goto done;
0f113f3e 2247 }
ace8f546
AP
2248 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2249 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2250 (!BN_to_felem(z_tmp, group->generator->Z)))
0f113f3e
MC
2251 goto err;
2252 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2253 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2254 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2255 /*
2256 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2257 * 2^160*G, 2^224*G for the second one
2258 */
2259 for (i = 1; i <= 8; i <<= 1) {
2260 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2261 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2262 pre->g_pre_comp[0][i][1],
2263 pre->g_pre_comp[0][i][2]);
2264 for (j = 0; j < 31; ++j) {
2265 point_double_small(pre->g_pre_comp[1][i][0],
2266 pre->g_pre_comp[1][i][1],
2267 pre->g_pre_comp[1][i][2],
2268 pre->g_pre_comp[1][i][0],
2269 pre->g_pre_comp[1][i][1],
2270 pre->g_pre_comp[1][i][2]);
2271 }
2272 if (i == 8)
2273 break;
2274 point_double_small(pre->g_pre_comp[0][2 * i][0],
2275 pre->g_pre_comp[0][2 * i][1],
2276 pre->g_pre_comp[0][2 * i][2],
2277 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2278 pre->g_pre_comp[1][i][2]);
2279 for (j = 0; j < 31; ++j) {
2280 point_double_small(pre->g_pre_comp[0][2 * i][0],
2281 pre->g_pre_comp[0][2 * i][1],
2282 pre->g_pre_comp[0][2 * i][2],
2283 pre->g_pre_comp[0][2 * i][0],
2284 pre->g_pre_comp[0][2 * i][1],
2285 pre->g_pre_comp[0][2 * i][2]);
2286 }
2287 }
2288 for (i = 0; i < 2; i++) {
2289 /* g_pre_comp[i][0] is the point at infinity */
2290 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2291 /* the remaining multiples */
2292 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2293 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2294 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2295 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2296 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2297 pre->g_pre_comp[i][2][2]);
2298 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2299 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2300 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2301 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2302 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2303 pre->g_pre_comp[i][2][2]);
2304 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2305 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2306 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2307 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2308 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2309 pre->g_pre_comp[i][4][2]);
2310 /*
2311 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2312 */
2313 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2314 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2315 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2316 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2317 pre->g_pre_comp[i][2][2]);
2318 for (j = 1; j < 8; ++j) {
2319 /* odd multiples: add G resp. 2^32*G */
2320 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2321 pre->g_pre_comp[i][2 * j + 1][1],
2322 pre->g_pre_comp[i][2 * j + 1][2],
2323 pre->g_pre_comp[i][2 * j][0],
2324 pre->g_pre_comp[i][2 * j][1],
2325 pre->g_pre_comp[i][2 * j][2],
2326 pre->g_pre_comp[i][1][0],
2327 pre->g_pre_comp[i][1][1],
2328 pre->g_pre_comp[i][1][2]);
2329 }
2330 }
2331 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2332
615614c8 2333 done:
3aef36ff 2334 SETPRECOMP(group, nistp256, pre);
0f113f3e 2335 pre = NULL;
3aef36ff
RS
2336 ret = 1;
2337
3e00b4c9 2338 err:
0f113f3e 2339 BN_CTX_end(ctx);
8fdc3734 2340 EC_POINT_free(generator);
23a1d5e9 2341 BN_CTX_free(new_ctx);
3aef36ff 2342 EC_nistp256_pre_comp_free(pre);
0f113f3e
MC
2343 return ret;
2344}
3e00b4c9
BM
2345
2346int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
0f113f3e 2347{
3aef36ff 2348 return HAVEPRECOMP(group, nistp256);
0f113f3e 2349}
3e00b4c9 2350#endif