]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp521.c
ec/ecp_nistp*.c: sanitize for undefined/implmentation-specific behaviour.
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
CommitLineData
9c37519b
BM
1/* crypto/ec/ecp_nistp521.c */
2/*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
3d520f7c
BM
29#include <openssl/opensslconf.h>
30#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
9c37519b 31
ae5c8664
MC
32# ifndef OPENSSL_SYS_VMS
33# include <stdint.h>
34# else
35# include <inttypes.h>
36# endif
70505bc3 37
ae5c8664
MC
38# include <string.h>
39# include <openssl/err.h>
40# include "ec_lcl.h"
9c37519b 41
ae5c8664 42# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
9c37519b 43 /* even with gcc, the typedef won't work for 32-bit platforms */
ae5c8664
MC
44typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
45 * platforms */
46# else
47# error "Need GCC 3.1 or later to define type uint128_t"
48# endif
9c37519b
BM
49
50typedef uint8_t u8;
51typedef uint64_t u64;
9c37519b 52
ae5c8664
MC
53/*
54 * The underlying field. P521 operates over GF(2^521-1). We can serialise an
55 * element of this field into 66 bytes where the most significant byte
56 * contains only a single bit. We call this an felem_bytearray.
57 */
9c37519b
BM
58
59typedef u8 felem_bytearray[66];
60
ae5c8664
MC
61/*
62 * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
63 * These values are big-endian.
64 */
65static const felem_bytearray nistp521_curve_params[5] = {
66 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff},
75 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
83 0xff, 0xfc},
84 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
85 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
86 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
87 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
88 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
89 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
90 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
91 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
92 0x3f, 0x00},
93 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
94 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
95 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
96 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
97 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
98 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
99 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
100 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
101 0xbd, 0x66},
102 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
103 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
104 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
105 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
106 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
107 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
108 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
109 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
110 0x66, 0x50}
111};
9c37519b 112
6977c7e2
TH
113/*-
114 * The representation of field elements.
9c37519b
BM
115 * ------------------------------------
116 *
117 * We represent field elements with nine values. These values are either 64 or
118 * 128 bits and the field element represented is:
119 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
120 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
121 * 58 bits apart, but are greater than 58 bits in length, the most significant
122 * bits of each limb overlap with the least significant bits of the next.
123 *
124 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
125 * 'largefelem' */
126
ae5c8664 127# define NLIMBS 9
9c37519b
BM
128
129typedef uint64_t limb;
130typedef limb felem[NLIMBS];
131typedef uint128_t largefelem[NLIMBS];
132
133static const limb bottom57bits = 0x1ffffffffffffff;
134static const limb bottom58bits = 0x3ffffffffffffff;
135
ae5c8664
MC
136/*
137 * bin66_to_felem takes a little-endian byte array and converts it into felem
138 * form. This assumes that the CPU is little-endian.
139 */
9c37519b 140static void bin66_to_felem(felem out, const u8 in[66])
ae5c8664
MC
141{
142 out[0] = (*((limb *) & in[0])) & bottom58bits;
143 out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits;
144 out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits;
145 out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits;
146 out[4] = (*((limb *) & in[29])) & bottom58bits;
147 out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits;
148 out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits;
149 out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits;
150 out[8] = (*((limb *) & in[58])) & bottom57bits;
151}
152
153/*
154 * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
155 * array. This assumes that the CPU is little-endian.
156 */
9c37519b 157static void felem_to_bin66(u8 out[66], const felem in)
ae5c8664
MC
158{
159 memset(out, 0, 66);
160 (*((limb *) & out[0])) = in[0];
161 (*((limb *) & out[7])) |= in[1] << 2;
162 (*((limb *) & out[14])) |= in[2] << 4;
163 (*((limb *) & out[21])) |= in[3] << 6;
164 (*((limb *) & out[29])) = in[4];
165 (*((limb *) & out[36])) |= in[5] << 2;
166 (*((limb *) & out[43])) |= in[6] << 4;
167 (*((limb *) & out[50])) |= in[7] << 6;
168 (*((limb *) & out[58])) = in[8];
169}
9c37519b
BM
170
171/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
172static void flip_endian(u8 *out, const u8 *in, unsigned len)
ae5c8664
MC
173{
174 unsigned i;
175 for (i = 0; i < len; ++i)
176 out[i] = in[len - 1 - i];
177}
9c37519b
BM
178
179/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
180static int BN_to_felem(felem out, const BIGNUM *bn)
ae5c8664
MC
181{
182 felem_bytearray b_in;
183 felem_bytearray b_out;
184 unsigned num_bytes;
185
186 /* BN_bn2bin eats leading zeroes */
c6738fd2 187 memset(b_out, 0, sizeof(b_out));
ae5c8664 188 num_bytes = BN_num_bytes(bn);
c6738fd2 189 if (num_bytes > sizeof(b_out)) {
ae5c8664
MC
190 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
191 return 0;
192 }
193 if (BN_is_negative(bn)) {
194 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
195 return 0;
196 }
197 num_bytes = BN_bn2bin(bn, b_in);
198 flip_endian(b_out, b_in, num_bytes);
199 bin66_to_felem(out, b_out);
200 return 1;
201}
9c37519b
BM
202
203/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
204static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
ae5c8664
MC
205{
206 felem_bytearray b_in, b_out;
207 felem_to_bin66(b_in, in);
c6738fd2
RS
208 flip_endian(b_out, b_in, sizeof(b_out));
209 return BN_bin2bn(b_out, sizeof(b_out), out);
ae5c8664 210}
9c37519b 211
e19d4a99
MC
212/*-
213 * Field operations
214 * ----------------
215 */
9c37519b
BM
216
217static void felem_one(felem out)
ae5c8664
MC
218{
219 out[0] = 1;
220 out[1] = 0;
221 out[2] = 0;
222 out[3] = 0;
223 out[4] = 0;
224 out[5] = 0;
225 out[6] = 0;
226 out[7] = 0;
227 out[8] = 0;
228}
9c37519b
BM
229
230static void felem_assign(felem out, const felem in)
ae5c8664
MC
231{
232 out[0] = in[0];
233 out[1] = in[1];
234 out[2] = in[2];
235 out[3] = in[3];
236 out[4] = in[4];
237 out[5] = in[5];
238 out[6] = in[6];
239 out[7] = in[7];
240 out[8] = in[8];
241}
9c37519b
BM
242
243/* felem_sum64 sets out = out + in. */
244static void felem_sum64(felem out, const felem in)
ae5c8664
MC
245{
246 out[0] += in[0];
247 out[1] += in[1];
248 out[2] += in[2];
249 out[3] += in[3];
250 out[4] += in[4];
251 out[5] += in[5];
252 out[6] += in[6];
253 out[7] += in[7];
254 out[8] += in[8];
255}
9c37519b
BM
256
257/* felem_scalar sets out = in * scalar */
258static void felem_scalar(felem out, const felem in, limb scalar)
ae5c8664
MC
259{
260 out[0] = in[0] * scalar;
261 out[1] = in[1] * scalar;
262 out[2] = in[2] * scalar;
263 out[3] = in[3] * scalar;
264 out[4] = in[4] * scalar;
265 out[5] = in[5] * scalar;
266 out[6] = in[6] * scalar;
267 out[7] = in[7] * scalar;
268 out[8] = in[8] * scalar;
269}
9c37519b
BM
270
271/* felem_scalar64 sets out = out * scalar */
272static void felem_scalar64(felem out, limb scalar)
ae5c8664
MC
273{
274 out[0] *= scalar;
275 out[1] *= scalar;
276 out[2] *= scalar;
277 out[3] *= scalar;
278 out[4] *= scalar;
279 out[5] *= scalar;
280 out[6] *= scalar;
281 out[7] *= scalar;
282 out[8] *= scalar;
283}
9c37519b
BM
284
285/* felem_scalar128 sets out = out * scalar */
286static void felem_scalar128(largefelem out, limb scalar)
ae5c8664
MC
287{
288 out[0] *= scalar;
289 out[1] *= scalar;
290 out[2] *= scalar;
291 out[3] *= scalar;
292 out[4] *= scalar;
293 out[5] *= scalar;
294 out[6] *= scalar;
295 out[7] *= scalar;
296 out[8] *= scalar;
297}
9c37519b 298
6977c7e2
TH
299/*-
300 * felem_neg sets |out| to |-in|
9c37519b
BM
301 * On entry:
302 * in[i] < 2^59 + 2^14
303 * On exit:
304 * out[i] < 2^62
305 */
306static void felem_neg(felem out, const felem in)
ae5c8664
MC
307{
308 /* In order to prevent underflow, we subtract from 0 mod p. */
309 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
310 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
311
312 out[0] = two62m3 - in[0];
313 out[1] = two62m2 - in[1];
314 out[2] = two62m2 - in[2];
315 out[3] = two62m2 - in[3];
316 out[4] = two62m2 - in[4];
317 out[5] = two62m2 - in[5];
318 out[6] = two62m2 - in[6];
319 out[7] = two62m2 - in[7];
320 out[8] = two62m2 - in[8];
321}
9c37519b 322
6977c7e2
TH
323/*-
324 * felem_diff64 subtracts |in| from |out|
9c37519b
BM
325 * On entry:
326 * in[i] < 2^59 + 2^14
327 * On exit:
328 * out[i] < out[i] + 2^62
329 */
330static void felem_diff64(felem out, const felem in)
ae5c8664
MC
331{
332 /*
333 * In order to prevent underflow, we add 0 mod p before subtracting.
334 */
335 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
336 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
337
338 out[0] += two62m3 - in[0];
339 out[1] += two62m2 - in[1];
340 out[2] += two62m2 - in[2];
341 out[3] += two62m2 - in[3];
342 out[4] += two62m2 - in[4];
343 out[5] += two62m2 - in[5];
344 out[6] += two62m2 - in[6];
345 out[7] += two62m2 - in[7];
346 out[8] += two62m2 - in[8];
347}
9c37519b 348
6977c7e2
TH
349/*-
350 * felem_diff_128_64 subtracts |in| from |out|
9c37519b
BM
351 * On entry:
352 * in[i] < 2^62 + 2^17
353 * On exit:
354 * out[i] < out[i] + 2^63
355 */
356static void felem_diff_128_64(largefelem out, const felem in)
ae5c8664
MC
357{
358 /*
359 * In order to prevent underflow, we add 0 mod p before subtracting.
360 */
361 static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5);
362 static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4);
363
364 out[0] += two63m6 - in[0];
365 out[1] += two63m5 - in[1];
366 out[2] += two63m5 - in[2];
367 out[3] += two63m5 - in[3];
368 out[4] += two63m5 - in[4];
369 out[5] += two63m5 - in[5];
370 out[6] += two63m5 - in[6];
371 out[7] += two63m5 - in[7];
372 out[8] += two63m5 - in[8];
373}
9c37519b 374
6977c7e2
TH
375/*-
376 * felem_diff_128_64 subtracts |in| from |out|
9c37519b
BM
377 * On entry:
378 * in[i] < 2^126
379 * On exit:
380 * out[i] < out[i] + 2^127 - 2^69
381 */
382static void felem_diff128(largefelem out, const largefelem in)
ae5c8664
MC
383{
384 /*
385 * In order to prevent underflow, we add 0 mod p before subtracting.
386 */
387 static const uint128_t two127m70 =
388 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);
389 static const uint128_t two127m69 =
390 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);
391
392 out[0] += (two127m70 - in[0]);
393 out[1] += (two127m69 - in[1]);
394 out[2] += (two127m69 - in[2]);
395 out[3] += (two127m69 - in[3]);
396 out[4] += (two127m69 - in[4]);
397 out[5] += (two127m69 - in[5]);
398 out[6] += (two127m69 - in[6]);
399 out[7] += (two127m69 - in[7]);
400 out[8] += (two127m69 - in[8]);
401}
9c37519b 402
6977c7e2
TH
403/*-
404 * felem_square sets |out| = |in|^2
9c37519b
BM
405 * On entry:
406 * in[i] < 2^62
407 * On exit:
408 * out[i] < 17 * max(in[i]) * max(in[i])
409 */
410static void felem_square(largefelem out, const felem in)
ae5c8664
MC
411{
412 felem inx2, inx4;
413 felem_scalar(inx2, in, 2);
414 felem_scalar(inx4, in, 4);
415
83975c80
MC
416 /*-
417 * We have many cases were we want to do
418 * in[x] * in[y] +
419 * in[y] * in[x]
420 * This is obviously just
421 * 2 * in[x] * in[y]
422 * However, rather than do the doubling on the 128 bit result, we
423 * double one of the inputs to the multiplication by reading from
424 * |inx2|
425 */
ae5c8664
MC
426
427 out[0] = ((uint128_t) in[0]) * in[0];
428 out[1] = ((uint128_t) in[0]) * inx2[1];
429 out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1];
430 out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2];
431 out[4] = ((uint128_t) in[0]) * inx2[4] +
432 ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2];
433 out[5] = ((uint128_t) in[0]) * inx2[5] +
434 ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3];
435 out[6] = ((uint128_t) in[0]) * inx2[6] +
436 ((uint128_t) in[1]) * inx2[5] +
437 ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3];
438 out[7] = ((uint128_t) in[0]) * inx2[7] +
439 ((uint128_t) in[1]) * inx2[6] +
440 ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4];
441 out[8] = ((uint128_t) in[0]) * inx2[8] +
442 ((uint128_t) in[1]) * inx2[7] +
443 ((uint128_t) in[2]) * inx2[6] +
444 ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4];
445
446 /*
447 * The remaining limbs fall above 2^521, with the first falling at 2^522.
448 * They correspond to locations one bit up from the limbs produced above
449 * so we would have to multiply by two to align them. Again, rather than
450 * operate on the 128-bit result, we double one of the inputs to the
451 * multiplication. If we want to double for both this reason, and the
452 * reason above, then we end up multiplying by four.
453 */
454
455 /* 9 */
456 out[0] += ((uint128_t) in[1]) * inx4[8] +
457 ((uint128_t) in[2]) * inx4[7] +
458 ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5];
459
460 /* 10 */
461 out[1] += ((uint128_t) in[2]) * inx4[8] +
462 ((uint128_t) in[3]) * inx4[7] +
463 ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5];
464
465 /* 11 */
466 out[2] += ((uint128_t) in[3]) * inx4[8] +
467 ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6];
468
469 /* 12 */
470 out[3] += ((uint128_t) in[4]) * inx4[8] +
471 ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6];
472
473 /* 13 */
474 out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7];
475
476 /* 14 */
477 out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7];
478
479 /* 15 */
480 out[6] += ((uint128_t) in[7]) * inx4[8];
481
482 /* 16 */
483 out[7] += ((uint128_t) in[8]) * inx2[8];
484}
9c37519b 485
6977c7e2
TH
486/*-
487 * felem_mul sets |out| = |in1| * |in2|
9c37519b
BM
488 * On entry:
489 * in1[i] < 2^64
490 * in2[i] < 2^63
491 * On exit:
492 * out[i] < 17 * max(in1[i]) * max(in2[i])
493 */
494static void felem_mul(largefelem out, const felem in1, const felem in2)
ae5c8664
MC
495{
496 felem in2x2;
497 felem_scalar(in2x2, in2, 2);
498
499 out[0] = ((uint128_t) in1[0]) * in2[0];
500
501 out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0];
502
503 out[2] = ((uint128_t) in1[0]) * in2[2] +
504 ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0];
505
506 out[3] = ((uint128_t) in1[0]) * in2[3] +
507 ((uint128_t) in1[1]) * in2[2] +
508 ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0];
509
510 out[4] = ((uint128_t) in1[0]) * in2[4] +
511 ((uint128_t) in1[1]) * in2[3] +
512 ((uint128_t) in1[2]) * in2[2] +
513 ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0];
514
515 out[5] = ((uint128_t) in1[0]) * in2[5] +
516 ((uint128_t) in1[1]) * in2[4] +
517 ((uint128_t) in1[2]) * in2[3] +
518 ((uint128_t) in1[3]) * in2[2] +
519 ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0];
520
521 out[6] = ((uint128_t) in1[0]) * in2[6] +
522 ((uint128_t) in1[1]) * in2[5] +
523 ((uint128_t) in1[2]) * in2[4] +
524 ((uint128_t) in1[3]) * in2[3] +
525 ((uint128_t) in1[4]) * in2[2] +
526 ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0];
527
528 out[7] = ((uint128_t) in1[0]) * in2[7] +
529 ((uint128_t) in1[1]) * in2[6] +
530 ((uint128_t) in1[2]) * in2[5] +
531 ((uint128_t) in1[3]) * in2[4] +
532 ((uint128_t) in1[4]) * in2[3] +
533 ((uint128_t) in1[5]) * in2[2] +
534 ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0];
535
536 out[8] = ((uint128_t) in1[0]) * in2[8] +
537 ((uint128_t) in1[1]) * in2[7] +
538 ((uint128_t) in1[2]) * in2[6] +
539 ((uint128_t) in1[3]) * in2[5] +
540 ((uint128_t) in1[4]) * in2[4] +
541 ((uint128_t) in1[5]) * in2[3] +
542 ((uint128_t) in1[6]) * in2[2] +
543 ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0];
544
545 /* See comment in felem_square about the use of in2x2 here */
546
547 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
548 ((uint128_t) in1[2]) * in2x2[7] +
549 ((uint128_t) in1[3]) * in2x2[6] +
550 ((uint128_t) in1[4]) * in2x2[5] +
551 ((uint128_t) in1[5]) * in2x2[4] +
552 ((uint128_t) in1[6]) * in2x2[3] +
553 ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1];
554
555 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
556 ((uint128_t) in1[3]) * in2x2[7] +
557 ((uint128_t) in1[4]) * in2x2[6] +
558 ((uint128_t) in1[5]) * in2x2[5] +
559 ((uint128_t) in1[6]) * in2x2[4] +
560 ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2];
561
562 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
563 ((uint128_t) in1[4]) * in2x2[7] +
564 ((uint128_t) in1[5]) * in2x2[6] +
565 ((uint128_t) in1[6]) * in2x2[5] +
566 ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3];
567
568 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
569 ((uint128_t) in1[5]) * in2x2[7] +
570 ((uint128_t) in1[6]) * in2x2[6] +
571 ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4];
572
573 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
574 ((uint128_t) in1[6]) * in2x2[7] +
575 ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5];
576
577 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
578 ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6];
579
580 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
581 ((uint128_t) in1[8]) * in2x2[7];
582
583 out[7] += ((uint128_t) in1[8]) * in2x2[8];
584}
9c37519b
BM
585
586static const limb bottom52bits = 0xfffffffffffff;
587
6977c7e2
TH
588/*-
589 * felem_reduce converts a largefelem to an felem.
9c37519b
BM
590 * On entry:
591 * in[i] < 2^128
592 * On exit:
593 * out[i] < 2^59 + 2^14
594 */
595static void felem_reduce(felem out, const largefelem in)
ae5c8664
MC
596{
597 u64 overflow1, overflow2;
598
599 out[0] = ((limb) in[0]) & bottom58bits;
600 out[1] = ((limb) in[1]) & bottom58bits;
601 out[2] = ((limb) in[2]) & bottom58bits;
602 out[3] = ((limb) in[3]) & bottom58bits;
603 out[4] = ((limb) in[4]) & bottom58bits;
604 out[5] = ((limb) in[5]) & bottom58bits;
605 out[6] = ((limb) in[6]) & bottom58bits;
606 out[7] = ((limb) in[7]) & bottom58bits;
607 out[8] = ((limb) in[8]) & bottom58bits;
608
609 /* out[i] < 2^58 */
610
611 out[1] += ((limb) in[0]) >> 58;
612 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
83975c80
MC
613 /*-
614 * out[1] < 2^58 + 2^6 + 2^58
615 * = 2^59 + 2^6
616 */
ae5c8664
MC
617 out[2] += ((limb) (in[0] >> 64)) >> 52;
618
619 out[2] += ((limb) in[1]) >> 58;
620 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
621 out[3] += ((limb) (in[1] >> 64)) >> 52;
622
623 out[3] += ((limb) in[2]) >> 58;
624 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
625 out[4] += ((limb) (in[2] >> 64)) >> 52;
626
627 out[4] += ((limb) in[3]) >> 58;
628 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
629 out[5] += ((limb) (in[3] >> 64)) >> 52;
630
631 out[5] += ((limb) in[4]) >> 58;
632 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
633 out[6] += ((limb) (in[4] >> 64)) >> 52;
634
635 out[6] += ((limb) in[5]) >> 58;
636 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
637 out[7] += ((limb) (in[5] >> 64)) >> 52;
638
639 out[7] += ((limb) in[6]) >> 58;
640 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
641 out[8] += ((limb) (in[6] >> 64)) >> 52;
642
643 out[8] += ((limb) in[7]) >> 58;
644 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
83975c80
MC
645 /*-
646 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
647 * < 2^59 + 2^13
648 */
ae5c8664
MC
649 overflow1 = ((limb) (in[7] >> 64)) >> 52;
650
651 overflow1 += ((limb) in[8]) >> 58;
652 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
653 overflow2 = ((limb) (in[8] >> 64)) >> 52;
654
655 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
656 overflow2 <<= 1; /* overflow2 < 2^13 */
657
658 out[0] += overflow1; /* out[0] < 2^60 */
659 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
660
661 out[1] += out[0] >> 58;
662 out[0] &= bottom58bits;
83975c80
MC
663 /*-
664 * out[0] < 2^58
665 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
666 * < 2^59 + 2^14
667 */
ae5c8664 668}
9c37519b
BM
669
670static void felem_square_reduce(felem out, const felem in)
ae5c8664
MC
671{
672 largefelem tmp;
673 felem_square(tmp, in);
674 felem_reduce(out, tmp);
675}
9c37519b
BM
676
677static void felem_mul_reduce(felem out, const felem in1, const felem in2)
ae5c8664
MC
678{
679 largefelem tmp;
680 felem_mul(tmp, in1, in2);
681 felem_reduce(out, tmp);
682}
9c37519b 683
6977c7e2
TH
684/*-
685 * felem_inv calculates |out| = |in|^{-1}
9c37519b
BM
686 *
687 * Based on Fermat's Little Theorem:
688 * a^p = a (mod p)
689 * a^{p-1} = 1 (mod p)
690 * a^{p-2} = a^{-1} (mod p)
691 */
692static void felem_inv(felem out, const felem in)
ae5c8664
MC
693{
694 felem ftmp, ftmp2, ftmp3, ftmp4;
695 largefelem tmp;
696 unsigned i;
697
698 felem_square(tmp, in);
699 felem_reduce(ftmp, tmp); /* 2^1 */
700 felem_mul(tmp, in, ftmp);
701 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
702 felem_assign(ftmp2, ftmp);
703 felem_square(tmp, ftmp);
704 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
705 felem_mul(tmp, in, ftmp);
706 felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
707 felem_square(tmp, ftmp);
708 felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
709
710 felem_square(tmp, ftmp2);
711 felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
712 felem_square(tmp, ftmp3);
713 felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
714 felem_mul(tmp, ftmp3, ftmp2);
715 felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
716
717 felem_assign(ftmp2, ftmp3);
718 felem_square(tmp, ftmp3);
719 felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
720 felem_square(tmp, ftmp3);
721 felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
722 felem_square(tmp, ftmp3);
723 felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
724 felem_square(tmp, ftmp3);
725 felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
726 felem_assign(ftmp4, ftmp3);
727 felem_mul(tmp, ftmp3, ftmp);
728 felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
729 felem_square(tmp, ftmp4);
730 felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
731 felem_mul(tmp, ftmp3, ftmp2);
732 felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
733 felem_assign(ftmp2, ftmp3);
734
735 for (i = 0; i < 8; i++) {
736 felem_square(tmp, ftmp3);
737 felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
738 }
739 felem_mul(tmp, ftmp3, ftmp2);
740 felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
741 felem_assign(ftmp2, ftmp3);
742
743 for (i = 0; i < 16; i++) {
744 felem_square(tmp, ftmp3);
745 felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
746 }
747 felem_mul(tmp, ftmp3, ftmp2);
748 felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
749 felem_assign(ftmp2, ftmp3);
750
751 for (i = 0; i < 32; i++) {
752 felem_square(tmp, ftmp3);
753 felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
754 }
755 felem_mul(tmp, ftmp3, ftmp2);
756 felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
757 felem_assign(ftmp2, ftmp3);
758
759 for (i = 0; i < 64; i++) {
760 felem_square(tmp, ftmp3);
761 felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
762 }
763 felem_mul(tmp, ftmp3, ftmp2);
764 felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
765 felem_assign(ftmp2, ftmp3);
766
767 for (i = 0; i < 128; i++) {
768 felem_square(tmp, ftmp3);
769 felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
770 }
771 felem_mul(tmp, ftmp3, ftmp2);
772 felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
773 felem_assign(ftmp2, ftmp3);
774
775 for (i = 0; i < 256; i++) {
776 felem_square(tmp, ftmp3);
777 felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
778 }
779 felem_mul(tmp, ftmp3, ftmp2);
780 felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
781
782 for (i = 0; i < 9; i++) {
783 felem_square(tmp, ftmp3);
784 felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
785 }
786 felem_mul(tmp, ftmp3, ftmp4);
787 felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
788 felem_mul(tmp, ftmp3, in);
789 felem_reduce(out, tmp); /* 2^512 - 3 */
9c37519b
BM
790}
791
792/* This is 2^521-1, expressed as an felem */
ae5c8664
MC
793static const felem kPrime = {
794 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
795 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
796 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
797};
9c37519b 798
6977c7e2
TH
799/*-
800 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
9c37519b
BM
801 * otherwise.
802 * On entry:
803 * in[i] < 2^59 + 2^14
804 */
805static limb felem_is_zero(const felem in)
ae5c8664
MC
806{
807 felem ftmp;
808 limb is_zero, is_p;
809 felem_assign(ftmp, in);
810
811 ftmp[0] += ftmp[8] >> 57;
812 ftmp[8] &= bottom57bits;
813 /* ftmp[8] < 2^57 */
814 ftmp[1] += ftmp[0] >> 58;
815 ftmp[0] &= bottom58bits;
816 ftmp[2] += ftmp[1] >> 58;
817 ftmp[1] &= bottom58bits;
818 ftmp[3] += ftmp[2] >> 58;
819 ftmp[2] &= bottom58bits;
820 ftmp[4] += ftmp[3] >> 58;
821 ftmp[3] &= bottom58bits;
822 ftmp[5] += ftmp[4] >> 58;
823 ftmp[4] &= bottom58bits;
824 ftmp[6] += ftmp[5] >> 58;
825 ftmp[5] &= bottom58bits;
826 ftmp[7] += ftmp[6] >> 58;
827 ftmp[6] &= bottom58bits;
828 ftmp[8] += ftmp[7] >> 58;
829 ftmp[7] &= bottom58bits;
830 /* ftmp[8] < 2^57 + 4 */
831
832 /*
833 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater
834 * than our bound for ftmp[8]. Therefore we only have to check if the
835 * zero is zero or 2^521-1.
836 */
837
838 is_zero = 0;
839 is_zero |= ftmp[0];
840 is_zero |= ftmp[1];
841 is_zero |= ftmp[2];
842 is_zero |= ftmp[3];
843 is_zero |= ftmp[4];
844 is_zero |= ftmp[5];
845 is_zero |= ftmp[6];
846 is_zero |= ftmp[7];
847 is_zero |= ftmp[8];
848
849 is_zero--;
850 /*
851 * We know that ftmp[i] < 2^63, therefore the only way that the top bit
852 * can be set is if is_zero was 0 before the decrement.
853 */
58cc2962 854 is_zero = 0 - (is_zero >> 63);
ae5c8664
MC
855
856 is_p = ftmp[0] ^ kPrime[0];
857 is_p |= ftmp[1] ^ kPrime[1];
858 is_p |= ftmp[2] ^ kPrime[2];
859 is_p |= ftmp[3] ^ kPrime[3];
860 is_p |= ftmp[4] ^ kPrime[4];
861 is_p |= ftmp[5] ^ kPrime[5];
862 is_p |= ftmp[6] ^ kPrime[6];
863 is_p |= ftmp[7] ^ kPrime[7];
864 is_p |= ftmp[8] ^ kPrime[8];
865
866 is_p--;
58cc2962 867 is_p = 0 - (is_p >> 63);
ae5c8664
MC
868
869 is_zero |= is_p;
870 return is_zero;
871}
9c37519b 872
e4c2e4cb 873static int felem_is_zero_int(const void *in)
ae5c8664
MC
874{
875 return (int)(felem_is_zero(in) & ((limb) 1));
876}
9c37519b 877
6977c7e2
TH
878/*-
879 * felem_contract converts |in| to its unique, minimal representation.
9c37519b
BM
880 * On entry:
881 * in[i] < 2^59 + 2^14
882 */
883static void felem_contract(felem out, const felem in)
ae5c8664
MC
884{
885 limb is_p, is_greater, sign;
886 static const limb two58 = ((limb) 1) << 58;
887
888 felem_assign(out, in);
889
890 out[0] += out[8] >> 57;
891 out[8] &= bottom57bits;
892 /* out[8] < 2^57 */
893 out[1] += out[0] >> 58;
894 out[0] &= bottom58bits;
895 out[2] += out[1] >> 58;
896 out[1] &= bottom58bits;
897 out[3] += out[2] >> 58;
898 out[2] &= bottom58bits;
899 out[4] += out[3] >> 58;
900 out[3] &= bottom58bits;
901 out[5] += out[4] >> 58;
902 out[4] &= bottom58bits;
903 out[6] += out[5] >> 58;
904 out[5] &= bottom58bits;
905 out[7] += out[6] >> 58;
906 out[6] &= bottom58bits;
907 out[8] += out[7] >> 58;
908 out[7] &= bottom58bits;
909 /* out[8] < 2^57 + 4 */
910
911 /*
912 * If the value is greater than 2^521-1 then we have to subtract 2^521-1
913 * out. See the comments in felem_is_zero regarding why we don't test for
914 * other multiples of the prime.
915 */
916
917 /*
918 * First, if |out| is equal to 2^521-1, we subtract it out to get zero.
919 */
920
921 is_p = out[0] ^ kPrime[0];
922 is_p |= out[1] ^ kPrime[1];
923 is_p |= out[2] ^ kPrime[2];
924 is_p |= out[3] ^ kPrime[3];
925 is_p |= out[4] ^ kPrime[4];
926 is_p |= out[5] ^ kPrime[5];
927 is_p |= out[6] ^ kPrime[6];
928 is_p |= out[7] ^ kPrime[7];
929 is_p |= out[8] ^ kPrime[8];
930
931 is_p--;
932 is_p &= is_p << 32;
933 is_p &= is_p << 16;
934 is_p &= is_p << 8;
935 is_p &= is_p << 4;
936 is_p &= is_p << 2;
937 is_p &= is_p << 1;
58cc2962 938 is_p = 0 - (is_p >> 63);
ae5c8664
MC
939 is_p = ~is_p;
940
941 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
942
943 out[0] &= is_p;
944 out[1] &= is_p;
945 out[2] &= is_p;
946 out[3] &= is_p;
947 out[4] &= is_p;
948 out[5] &= is_p;
949 out[6] &= is_p;
950 out[7] &= is_p;
951 out[8] &= is_p;
952
953 /*
954 * In order to test that |out| >= 2^521-1 we need only test if out[8] >>
955 * 57 is greater than zero as (2^521-1) + x >= 2^522
956 */
957 is_greater = out[8] >> 57;
958 is_greater |= is_greater << 32;
959 is_greater |= is_greater << 16;
960 is_greater |= is_greater << 8;
961 is_greater |= is_greater << 4;
962 is_greater |= is_greater << 2;
963 is_greater |= is_greater << 1;
58cc2962 964 is_greater = 0 - (is_greater >> 63);
ae5c8664
MC
965
966 out[0] -= kPrime[0] & is_greater;
967 out[1] -= kPrime[1] & is_greater;
968 out[2] -= kPrime[2] & is_greater;
969 out[3] -= kPrime[3] & is_greater;
970 out[4] -= kPrime[4] & is_greater;
971 out[5] -= kPrime[5] & is_greater;
972 out[6] -= kPrime[6] & is_greater;
973 out[7] -= kPrime[7] & is_greater;
974 out[8] -= kPrime[8] & is_greater;
975
976 /* Eliminate negative coefficients */
977 sign = -(out[0] >> 63);
978 out[0] += (two58 & sign);
979 out[1] -= (1 & sign);
980 sign = -(out[1] >> 63);
981 out[1] += (two58 & sign);
982 out[2] -= (1 & sign);
983 sign = -(out[2] >> 63);
984 out[2] += (two58 & sign);
985 out[3] -= (1 & sign);
986 sign = -(out[3] >> 63);
987 out[3] += (two58 & sign);
988 out[4] -= (1 & sign);
989 sign = -(out[4] >> 63);
990 out[4] += (two58 & sign);
991 out[5] -= (1 & sign);
992 sign = -(out[0] >> 63);
993 out[5] += (two58 & sign);
994 out[6] -= (1 & sign);
995 sign = -(out[6] >> 63);
996 out[6] += (two58 & sign);
997 out[7] -= (1 & sign);
998 sign = -(out[7] >> 63);
999 out[7] += (two58 & sign);
1000 out[8] -= (1 & sign);
1001 sign = -(out[5] >> 63);
1002 out[5] += (two58 & sign);
1003 out[6] -= (1 & sign);
1004 sign = -(out[6] >> 63);
1005 out[6] += (two58 & sign);
1006 out[7] -= (1 & sign);
1007 sign = -(out[7] >> 63);
1008 out[7] += (two58 & sign);
1009 out[8] -= (1 & sign);
1010}
9c37519b 1011
6977c7e2
TH
1012/*-
1013 * Group operations
9c37519b
BM
1014 * ----------------
1015 *
1016 * Building on top of the field operations we have the operations on the
1017 * elliptic curve group itself. Points on the curve are represented in Jacobian
1018 * coordinates */
1019
6977c7e2
TH
1020/*-
1021 * point_double calcuates 2*(x_in, y_in, z_in)
9c37519b
BM
1022 *
1023 * The method is taken from:
1024 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1025 *
1026 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1027 * while x_out == y_in is not (maybe this works, but it's not tested). */
1028static void
1029point_double(felem x_out, felem y_out, felem z_out,
ae5c8664
MC
1030 const felem x_in, const felem y_in, const felem z_in)
1031{
1032 largefelem tmp, tmp2;
1033 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1034
1035 felem_assign(ftmp, x_in);
1036 felem_assign(ftmp2, x_in);
1037
1038 /* delta = z^2 */
1039 felem_square(tmp, z_in);
1040 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
1041
1042 /* gamma = y^2 */
1043 felem_square(tmp, y_in);
1044 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
1045
1046 /* beta = x*gamma */
1047 felem_mul(tmp, x_in, gamma);
1048 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
1049
1050 /* alpha = 3*(x-delta)*(x+delta) */
1051 felem_diff64(ftmp, delta);
1052 /* ftmp[i] < 2^61 */
1053 felem_sum64(ftmp2, delta);
1054 /* ftmp2[i] < 2^60 + 2^15 */
1055 felem_scalar64(ftmp2, 3);
1056 /* ftmp2[i] < 3*2^60 + 3*2^15 */
1057 felem_mul(tmp, ftmp, ftmp2);
83975c80
MC
1058 /*-
1059 * tmp[i] < 17(3*2^121 + 3*2^76)
1060 * = 61*2^121 + 61*2^76
1061 * < 64*2^121 + 64*2^76
1062 * = 2^127 + 2^82
1063 * < 2^128
1064 */
ae5c8664
MC
1065 felem_reduce(alpha, tmp);
1066
1067 /* x' = alpha^2 - 8*beta */
1068 felem_square(tmp, alpha);
1069 /*
1070 * tmp[i] < 17*2^120 < 2^125
1071 */
1072 felem_assign(ftmp, beta);
1073 felem_scalar64(ftmp, 8);
1074 /* ftmp[i] < 2^62 + 2^17 */
1075 felem_diff_128_64(tmp, ftmp);
1076 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1077 felem_reduce(x_out, tmp);
1078
1079 /* z' = (y + z)^2 - gamma - delta */
1080 felem_sum64(delta, gamma);
1081 /* delta[i] < 2^60 + 2^15 */
1082 felem_assign(ftmp, y_in);
1083 felem_sum64(ftmp, z_in);
1084 /* ftmp[i] < 2^60 + 2^15 */
1085 felem_square(tmp, ftmp);
1086 /*
1087 * tmp[i] < 17(2^122) < 2^127
1088 */
1089 felem_diff_128_64(tmp, delta);
1090 /* tmp[i] < 2^127 + 2^63 */
1091 felem_reduce(z_out, tmp);
1092
1093 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1094 felem_scalar64(beta, 4);
1095 /* beta[i] < 2^61 + 2^16 */
1096 felem_diff64(beta, x_out);
1097 /* beta[i] < 2^61 + 2^60 + 2^16 */
1098 felem_mul(tmp, alpha, beta);
83975c80
MC
1099 /*-
1100 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1101 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1102 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1103 * < 2^128
1104 */
ae5c8664 1105 felem_square(tmp2, gamma);
83975c80
MC
1106 /*-
1107 * tmp2[i] < 17*(2^59 + 2^14)^2
1108 * = 17*(2^118 + 2^74 + 2^28)
1109 */
ae5c8664 1110 felem_scalar128(tmp2, 8);
83975c80
MC
1111 /*-
1112 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1113 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1114 * < 2^126
1115 */
ae5c8664 1116 felem_diff128(tmp, tmp2);
83975c80
MC
1117 /*-
1118 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1119 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1120 * 2^74 + 2^69 + 2^34 + 2^30
1121 * < 2^128
1122 */
ae5c8664
MC
1123 felem_reduce(y_out, tmp);
1124}
9c37519b
BM
1125
1126/* copy_conditional copies in to out iff mask is all ones. */
ae5c8664
MC
1127static void copy_conditional(felem out, const felem in, limb mask)
1128{
1129 unsigned i;
1130 for (i = 0; i < NLIMBS; ++i) {
1131 const limb tmp = mask & (in[i] ^ out[i]);
1132 out[i] ^= tmp;
1133 }
1134}
9c37519b 1135
6977c7e2
TH
1136/*-
1137 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
9c37519b
BM
1138 *
1139 * The method is taken from
1140 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1141 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1142 *
1143 * This function includes a branch for checking whether the two input points
1144 * are equal (while not equal to the point at infinity). This case never
1145 * happens during single point multiplication, so there is no timing leak for
1146 * ECDH or ECDSA signing. */
1147static void point_add(felem x3, felem y3, felem z3,
ae5c8664
MC
1148 const felem x1, const felem y1, const felem z1,
1149 const int mixed, const felem x2, const felem y2,
1150 const felem z2)
1151{
1152 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1153 largefelem tmp, tmp2;
1154 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1155
1156 z1_is_zero = felem_is_zero(z1);
1157 z2_is_zero = felem_is_zero(z2);
1158
1159 /* ftmp = z1z1 = z1**2 */
1160 felem_square(tmp, z1);
1161 felem_reduce(ftmp, tmp);
1162
1163 if (!mixed) {
1164 /* ftmp2 = z2z2 = z2**2 */
1165 felem_square(tmp, z2);
1166 felem_reduce(ftmp2, tmp);
1167
1168 /* u1 = ftmp3 = x1*z2z2 */
1169 felem_mul(tmp, x1, ftmp2);
1170 felem_reduce(ftmp3, tmp);
1171
1172 /* ftmp5 = z1 + z2 */
1173 felem_assign(ftmp5, z1);
1174 felem_sum64(ftmp5, z2);
1175 /* ftmp5[i] < 2^61 */
1176
1177 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1178 felem_square(tmp, ftmp5);
1179 /* tmp[i] < 17*2^122 */
1180 felem_diff_128_64(tmp, ftmp);
1181 /* tmp[i] < 17*2^122 + 2^63 */
1182 felem_diff_128_64(tmp, ftmp2);
1183 /* tmp[i] < 17*2^122 + 2^64 */
1184 felem_reduce(ftmp5, tmp);
1185
1186 /* ftmp2 = z2 * z2z2 */
1187 felem_mul(tmp, ftmp2, z2);
1188 felem_reduce(ftmp2, tmp);
1189
1190 /* s1 = ftmp6 = y1 * z2**3 */
1191 felem_mul(tmp, y1, ftmp2);
1192 felem_reduce(ftmp6, tmp);
1193 } else {
1194 /*
1195 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1196 */
1197
1198 /* u1 = ftmp3 = x1*z2z2 */
1199 felem_assign(ftmp3, x1);
1200
1201 /* ftmp5 = 2*z1z2 */
1202 felem_scalar(ftmp5, z1, 2);
1203
1204 /* s1 = ftmp6 = y1 * z2**3 */
1205 felem_assign(ftmp6, y1);
1206 }
1207
1208 /* u2 = x2*z1z1 */
1209 felem_mul(tmp, x2, ftmp);
1210 /* tmp[i] < 17*2^120 */
1211
1212 /* h = ftmp4 = u2 - u1 */
1213 felem_diff_128_64(tmp, ftmp3);
1214 /* tmp[i] < 17*2^120 + 2^63 */
1215 felem_reduce(ftmp4, tmp);
1216
1217 x_equal = felem_is_zero(ftmp4);
1218
1219 /* z_out = ftmp5 * h */
1220 felem_mul(tmp, ftmp5, ftmp4);
1221 felem_reduce(z_out, tmp);
1222
1223 /* ftmp = z1 * z1z1 */
1224 felem_mul(tmp, ftmp, z1);
1225 felem_reduce(ftmp, tmp);
1226
1227 /* s2 = tmp = y2 * z1**3 */
1228 felem_mul(tmp, y2, ftmp);
1229 /* tmp[i] < 17*2^120 */
1230
1231 /* r = ftmp5 = (s2 - s1)*2 */
1232 felem_diff_128_64(tmp, ftmp6);
1233 /* tmp[i] < 17*2^120 + 2^63 */
1234 felem_reduce(ftmp5, tmp);
1235 y_equal = felem_is_zero(ftmp5);
1236 felem_scalar64(ftmp5, 2);
1237 /* ftmp5[i] < 2^61 */
1238
1239 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1240 point_double(x3, y3, z3, x1, y1, z1);
1241 return;
1242 }
1243
1244 /* I = ftmp = (2h)**2 */
1245 felem_assign(ftmp, ftmp4);
1246 felem_scalar64(ftmp, 2);
1247 /* ftmp[i] < 2^61 */
1248 felem_square(tmp, ftmp);
1249 /* tmp[i] < 17*2^122 */
1250 felem_reduce(ftmp, tmp);
1251
1252 /* J = ftmp2 = h * I */
1253 felem_mul(tmp, ftmp4, ftmp);
1254 felem_reduce(ftmp2, tmp);
1255
1256 /* V = ftmp4 = U1 * I */
1257 felem_mul(tmp, ftmp3, ftmp);
1258 felem_reduce(ftmp4, tmp);
1259
1260 /* x_out = r**2 - J - 2V */
1261 felem_square(tmp, ftmp5);
1262 /* tmp[i] < 17*2^122 */
1263 felem_diff_128_64(tmp, ftmp2);
1264 /* tmp[i] < 17*2^122 + 2^63 */
1265 felem_assign(ftmp3, ftmp4);
1266 felem_scalar64(ftmp4, 2);
1267 /* ftmp4[i] < 2^61 */
1268 felem_diff_128_64(tmp, ftmp4);
1269 /* tmp[i] < 17*2^122 + 2^64 */
1270 felem_reduce(x_out, tmp);
1271
1272 /* y_out = r(V-x_out) - 2 * s1 * J */
1273 felem_diff64(ftmp3, x_out);
1274 /*
1275 * ftmp3[i] < 2^60 + 2^60 = 2^61
1276 */
1277 felem_mul(tmp, ftmp5, ftmp3);
1278 /* tmp[i] < 17*2^122 */
1279 felem_mul(tmp2, ftmp6, ftmp2);
1280 /* tmp2[i] < 17*2^120 */
1281 felem_scalar128(tmp2, 2);
1282 /* tmp2[i] < 17*2^121 */
1283 felem_diff128(tmp, tmp2);
1284 /*-
1285 * tmp[i] < 2^127 - 2^69 + 17*2^122
1286 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1287 * < 2^127
1288 */
1289 felem_reduce(y_out, tmp);
1290
1291 copy_conditional(x_out, x2, z1_is_zero);
1292 copy_conditional(x_out, x1, z2_is_zero);
1293 copy_conditional(y_out, y2, z1_is_zero);
1294 copy_conditional(y_out, y1, z2_is_zero);
1295 copy_conditional(z_out, z2, z1_is_zero);
1296 copy_conditional(z_out, z1, z2_is_zero);
1297 felem_assign(x3, x_out);
1298 felem_assign(y3, y_out);
1299 felem_assign(z3, z_out);
1300}
9c37519b 1301
6977c7e2
TH
1302/*-
1303 * Base point pre computation
9c37519b
BM
1304 * --------------------------
1305 *
1306 * Two different sorts of precomputed tables are used in the following code.
1307 * Each contain various points on the curve, where each point is three field
1308 * elements (x, y, z).
1309 *
1310 * For the base point table, z is usually 1 (0 for the point at infinity).
1311 * This table has 16 elements:
1312 * index | bits | point
1313 * ------+---------+------------------------------
1314 * 0 | 0 0 0 0 | 0G
1315 * 1 | 0 0 0 1 | 1G
1316 * 2 | 0 0 1 0 | 2^130G
1317 * 3 | 0 0 1 1 | (2^130 + 1)G
1318 * 4 | 0 1 0 0 | 2^260G
1319 * 5 | 0 1 0 1 | (2^260 + 1)G
1320 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1321 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1322 * 8 | 1 0 0 0 | 2^390G
1323 * 9 | 1 0 0 1 | (2^390 + 1)G
1324 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1325 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1326 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1327 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1328 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1329 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1330 *
1331 * The reason for this is so that we can clock bits into four different
1332 * locations when doing simple scalar multiplies against the base point.
1333 *
1334 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1335
1336/* gmul is the table of precomputed base points */
ae5c8664
MC
1337static const felem gmul[16][3] = { {{0, 0, 0, 0, 0, 0, 0, 0, 0},
1338 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1339 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1340{{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1341 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1342 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1343 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1344 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1345 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1346 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1347{{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1348 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1349 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1350 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1351 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1352 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1353 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1354{{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1355 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1356 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1357 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1358 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1359 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1360 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1361{{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1362 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1363 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1364 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1365 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1366 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1367 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1368{{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1369 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1370 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1371 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1372 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1373 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1374 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1375{{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1376 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1377 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1378 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1379 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1380 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1381 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1382{{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1383 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1384 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1385 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1386 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1387 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1388 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1389{{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1390 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1391 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1392 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1393 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1394 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1395 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1396{{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1397 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1398 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1399 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1400 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1401 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1402 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1403{{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1404 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1405 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1406 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1407 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1408 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1409 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1410{{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1411 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1412 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1413 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1414 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1415 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1416 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1417{{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1418 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1419 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1420 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1421 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1422 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1423 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1424{{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1425 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1426 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1427 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1428 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1429 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1430 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1431{{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1432 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1433 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1434 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1435 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1436 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1437 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1438{{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1439 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1440 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1441 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1442 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1443 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1444 {1, 0, 0, 0, 0, 0, 0, 0, 0}}
1445};
1446
1447/*
1448 * select_point selects the |idx|th point from a precomputation table and
1449 * copies it to out.
1450 */
cc6e2a59 1451 /* pre_comp below is of the size provided in |size| */
ae5c8664
MC
1452static void select_point(const limb idx, unsigned int size,
1453 const felem pre_comp[][3], felem out[3])
1454{
1455 unsigned i, j;
1456 limb *outlimbs = &out[0][0];
1457 memset(outlimbs, 0, 3 * sizeof(felem));
1458
1459 for (i = 0; i < size; i++) {
1460 const limb *inlimbs = &pre_comp[i][0][0];
1461 limb mask = i ^ idx;
1462 mask |= mask >> 4;
1463 mask |= mask >> 2;
1464 mask |= mask >> 1;
1465 mask &= 1;
1466 mask--;
1467 for (j = 0; j < NLIMBS * 3; j++)
1468 outlimbs[j] |= inlimbs[j] & mask;
1469 }
1470}
9c37519b
BM
1471
1472/* get_bit returns the |i|th bit in |in| */
1473static char get_bit(const felem_bytearray in, int i)
ae5c8664
MC
1474{
1475 if (i < 0)
1476 return 0;
1477 return (in[i >> 3] >> (i & 7)) & 1;
1478}
9c37519b 1479
ae5c8664
MC
1480/*
1481 * Interleaved point multiplication using precomputed point multiples: The
1482 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1483 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1484 * generator, using certain (large) precomputed multiples in g_pre_comp.
1485 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1486 */
1487static void batch_mul(felem x_out, felem y_out, felem z_out,
1488 const felem_bytearray scalars[],
1489 const unsigned num_points, const u8 *g_scalar,
1490 const int mixed, const felem pre_comp[][17][3],
1491 const felem g_pre_comp[16][3])
1492{
1493 int i, skip;
1494 unsigned num, gen_mul = (g_scalar != NULL);
1495 felem nq[3], tmp[4];
1496 limb bits;
1497 u8 sign, digit;
1498
1499 /* set nq to the point at infinity */
1500 memset(nq, 0, 3 * sizeof(felem));
1501
1502 /*
1503 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1504 * of the generator (last quarter of rounds) and additions of other
1505 * points multiples (every 5th round).
1506 */
1507 skip = 1; /* save two point operations in the first
1508 * round */
1509 for (i = (num_points ? 520 : 130); i >= 0; --i) {
1510 /* double */
1511 if (!skip)
1512 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1513
1514 /* add multiples of the generator */
1515 if (gen_mul && (i <= 130)) {
1516 bits = get_bit(g_scalar, i + 390) << 3;
1517 if (i < 130) {
1518 bits |= get_bit(g_scalar, i + 260) << 2;
1519 bits |= get_bit(g_scalar, i + 130) << 1;
1520 bits |= get_bit(g_scalar, i);
1521 }
1522 /* select the point to add, in constant time */
1523 select_point(bits, 16, g_pre_comp, tmp);
1524 if (!skip) {
1525 /* The 1 argument below is for "mixed" */
1526 point_add(nq[0], nq[1], nq[2],
1527 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1528 } else {
1529 memcpy(nq, tmp, 3 * sizeof(felem));
1530 skip = 0;
1531 }
1532 }
1533
1534 /* do other additions every 5 doublings */
1535 if (num_points && (i % 5 == 0)) {
1536 /* loop over all scalars */
1537 for (num = 0; num < num_points; ++num) {
1538 bits = get_bit(scalars[num], i + 4) << 5;
1539 bits |= get_bit(scalars[num], i + 3) << 4;
1540 bits |= get_bit(scalars[num], i + 2) << 3;
1541 bits |= get_bit(scalars[num], i + 1) << 2;
1542 bits |= get_bit(scalars[num], i) << 1;
1543 bits |= get_bit(scalars[num], i - 1);
1544 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1545
1546 /*
1547 * select the point to add or subtract, in constant time
1548 */
1549 select_point(digit, 17, pre_comp[num], tmp);
1550 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1551 * point */
1552 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1553
1554 if (!skip) {
1555 point_add(nq[0], nq[1], nq[2],
1556 nq[0], nq[1], nq[2],
1557 mixed, tmp[0], tmp[1], tmp[2]);
1558 } else {
1559 memcpy(nq, tmp, 3 * sizeof(felem));
1560 skip = 0;
1561 }
1562 }
1563 }
1564 }
1565 felem_assign(x_out, nq[0]);
1566 felem_assign(y_out, nq[1]);
1567 felem_assign(z_out, nq[2]);
1568}
9c37519b
BM
1569
1570/* Precomputation for the group generator. */
1571typedef struct {
ae5c8664
MC
1572 felem g_pre_comp[16][3];
1573 int references;
9c37519b
BM
1574} NISTP521_PRE_COMP;
1575
1576const EC_METHOD *EC_GFp_nistp521_method(void)
ae5c8664
MC
1577{
1578 static const EC_METHOD ret = {
1579 EC_FLAGS_DEFAULT_OCT,
1580 NID_X9_62_prime_field,
1581 ec_GFp_nistp521_group_init,
1582 ec_GFp_simple_group_finish,
1583 ec_GFp_simple_group_clear_finish,
1584 ec_GFp_nist_group_copy,
1585 ec_GFp_nistp521_group_set_curve,
1586 ec_GFp_simple_group_get_curve,
1587 ec_GFp_simple_group_get_degree,
1588 ec_GFp_simple_group_check_discriminant,
1589 ec_GFp_simple_point_init,
1590 ec_GFp_simple_point_finish,
1591 ec_GFp_simple_point_clear_finish,
1592 ec_GFp_simple_point_copy,
1593 ec_GFp_simple_point_set_to_infinity,
1594 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1595 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1596 ec_GFp_simple_point_set_affine_coordinates,
1597 ec_GFp_nistp521_point_get_affine_coordinates,
1598 0 /* point_set_compressed_coordinates */ ,
1599 0 /* point2oct */ ,
1600 0 /* oct2point */ ,
1601 ec_GFp_simple_add,
1602 ec_GFp_simple_dbl,
1603 ec_GFp_simple_invert,
1604 ec_GFp_simple_is_at_infinity,
1605 ec_GFp_simple_is_on_curve,
1606 ec_GFp_simple_cmp,
1607 ec_GFp_simple_make_affine,
1608 ec_GFp_simple_points_make_affine,
1609 ec_GFp_nistp521_points_mul,
1610 ec_GFp_nistp521_precompute_mult,
1611 ec_GFp_nistp521_have_precompute_mult,
1612 ec_GFp_nist_field_mul,
1613 ec_GFp_nist_field_sqr,
1614 0 /* field_div */ ,
1615 0 /* field_encode */ ,
1616 0 /* field_decode */ ,
1617 0 /* field_set_to_one */
1618 };
1619
1620 return &ret;
1621}
9c37519b
BM
1622
1623/******************************************************************************/
ae5c8664
MC
1624/*
1625 * FUNCTIONS TO MANAGE PRECOMPUTATION
9c37519b
BM
1626 */
1627
1628static NISTP521_PRE_COMP *nistp521_pre_comp_new()
ae5c8664
MC
1629{
1630 NISTP521_PRE_COMP *ret = NULL;
1631 ret = (NISTP521_PRE_COMP *) OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1632 if (!ret) {
1633 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1634 return ret;
1635 }
1636 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1637 ret->references = 1;
1638 return ret;
1639}
9c37519b
BM
1640
1641static void *nistp521_pre_comp_dup(void *src_)
ae5c8664
MC
1642{
1643 NISTP521_PRE_COMP *src = src_;
9c37519b 1644
ae5c8664
MC
1645 /* no need to actually copy, these objects never change! */
1646 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
9c37519b 1647
ae5c8664
MC
1648 return src_;
1649}
9c37519b
BM
1650
1651static void nistp521_pre_comp_free(void *pre_)
ae5c8664
MC
1652{
1653 int i;
1654 NISTP521_PRE_COMP *pre = pre_;
9c37519b 1655
ae5c8664
MC
1656 if (!pre)
1657 return;
9c37519b 1658
ae5c8664
MC
1659 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1660 if (i > 0)
1661 return;
9c37519b 1662
ae5c8664
MC
1663 OPENSSL_free(pre);
1664}
9c37519b
BM
1665
1666static void nistp521_pre_comp_clear_free(void *pre_)
ae5c8664
MC
1667{
1668 int i;
1669 NISTP521_PRE_COMP *pre = pre_;
9c37519b 1670
ae5c8664
MC
1671 if (!pre)
1672 return;
9c37519b 1673
ae5c8664
MC
1674 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1675 if (i > 0)
1676 return;
9c37519b 1677
ae5c8664
MC
1678 OPENSSL_cleanse(pre, sizeof(*pre));
1679 OPENSSL_free(pre);
1680}
9c37519b
BM
1681
1682/******************************************************************************/
ae5c8664
MC
1683/*
1684 * OPENSSL EC_METHOD FUNCTIONS
9c37519b
BM
1685 */
1686
1687int ec_GFp_nistp521_group_init(EC_GROUP *group)
ae5c8664
MC
1688{
1689 int ret;
1690 ret = ec_GFp_simple_group_init(group);
1691 group->a_is_minus3 = 1;
1692 return ret;
1693}
9c37519b
BM
1694
1695int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
ae5c8664
MC
1696 const BIGNUM *a, const BIGNUM *b,
1697 BN_CTX *ctx)
1698{
1699 int ret = 0;
1700 BN_CTX *new_ctx = NULL;
1701 BIGNUM *curve_p, *curve_a, *curve_b;
1702
1703 if (ctx == NULL)
1704 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1705 return 0;
1706 BN_CTX_start(ctx);
1707 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1708 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1709 ((curve_b = BN_CTX_get(ctx)) == NULL))
1710 goto err;
1711 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1712 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1713 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1714 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1715 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1716 EC_R_WRONG_CURVE_PARAMETERS);
1717 goto err;
1718 }
1719 group->field_mod_func = BN_nist_mod_521;
1720 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1721 err:
1722 BN_CTX_end(ctx);
1723 if (new_ctx != NULL)
1724 BN_CTX_free(new_ctx);
1725 return ret;
1726}
1727
1728/*
1729 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1730 * (X/Z^2, Y/Z^3)
1731 */
9c37519b 1732int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
ae5c8664
MC
1733 const EC_POINT *point,
1734 BIGNUM *x, BIGNUM *y,
1735 BN_CTX *ctx)
1736{
1737 felem z1, z2, x_in, y_in, x_out, y_out;
1738 largefelem tmp;
1739
1740 if (EC_POINT_is_at_infinity(group, point)) {
1741 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1742 EC_R_POINT_AT_INFINITY);
1743 return 0;
1744 }
1745 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1746 (!BN_to_felem(z1, &point->Z)))
1747 return 0;
1748 felem_inv(z2, z1);
1749 felem_square(tmp, z2);
1750 felem_reduce(z1, tmp);
1751 felem_mul(tmp, x_in, z1);
1752 felem_reduce(x_in, tmp);
1753 felem_contract(x_out, x_in);
1754 if (x != NULL) {
1755 if (!felem_to_BN(x, x_out)) {
1756 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1757 ERR_R_BN_LIB);
1758 return 0;
1759 }
1760 }
1761 felem_mul(tmp, z1, z2);
1762 felem_reduce(z1, tmp);
1763 felem_mul(tmp, y_in, z1);
1764 felem_reduce(y_in, tmp);
1765 felem_contract(y_out, y_in);
1766 if (y != NULL) {
1767 if (!felem_to_BN(y, y_out)) {
1768 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1769 ERR_R_BN_LIB);
1770 return 0;
1771 }
1772 }
1773 return 1;
1774}
9c37519b 1775
cc6e2a59 1776/* points below is of size |num|, and tmp_felems is of size |num+1/ */
ae5c8664
MC
1777static void make_points_affine(size_t num, felem points[][3],
1778 felem tmp_felems[])
1779{
1780 /*
1781 * Runs in constant time, unless an input is the point at infinity (which
1782 * normally shouldn't happen).
1783 */
1784 ec_GFp_nistp_points_make_affine_internal(num,
1785 points,
1786 sizeof(felem),
1787 tmp_felems,
1788 (void (*)(void *))felem_one,
ae5c8664
MC
1789 felem_is_zero_int,
1790 (void (*)(void *, const void *))
1791 felem_assign,
1792 (void (*)(void *, const void *))
1793 felem_square_reduce, (void (*)
1794 (void *,
1795 const void
1796 *,
1797 const void
1798 *))
1799 felem_mul_reduce,
1800 (void (*)(void *, const void *))
1801 felem_inv,
1802 (void (*)(void *, const void *))
1803 felem_contract);
1804}
1805
1806/*
1807 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1808 * values Result is stored in r (r can equal one of the inputs).
1809 */
9c37519b 1810int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
ae5c8664
MC
1811 const BIGNUM *scalar, size_t num,
1812 const EC_POINT *points[],
1813 const BIGNUM *scalars[], BN_CTX *ctx)
1814{
1815 int ret = 0;
1816 int j;
1817 int mixed = 0;
1818 BN_CTX *new_ctx = NULL;
1819 BIGNUM *x, *y, *z, *tmp_scalar;
1820 felem_bytearray g_secret;
1821 felem_bytearray *secrets = NULL;
1822 felem(*pre_comp)[17][3] = NULL;
1823 felem *tmp_felems = NULL;
1824 felem_bytearray tmp;
1825 unsigned i, num_bytes;
1826 int have_pre_comp = 0;
1827 size_t num_points = num;
1828 felem x_in, y_in, z_in, x_out, y_out, z_out;
1829 NISTP521_PRE_COMP *pre = NULL;
1830 felem(*g_pre_comp)[3] = NULL;
1831 EC_POINT *generator = NULL;
1832 const EC_POINT *p = NULL;
1833 const BIGNUM *p_scalar = NULL;
1834
1835 if (ctx == NULL)
1836 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1837 return 0;
1838 BN_CTX_start(ctx);
1839 if (((x = BN_CTX_get(ctx)) == NULL) ||
1840 ((y = BN_CTX_get(ctx)) == NULL) ||
1841 ((z = BN_CTX_get(ctx)) == NULL) ||
1842 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1843 goto err;
1844
1845 if (scalar != NULL) {
1846 pre = EC_EX_DATA_get_data(group->extra_data,
1847 nistp521_pre_comp_dup,
1848 nistp521_pre_comp_free,
1849 nistp521_pre_comp_clear_free);
1850 if (pre)
1851 /* we have precomputation, try to use it */
1852 g_pre_comp = &pre->g_pre_comp[0];
1853 else
1854 /* try to use the standard precomputation */
1855 g_pre_comp = (felem(*)[3]) gmul;
1856 generator = EC_POINT_new(group);
1857 if (generator == NULL)
1858 goto err;
1859 /* get the generator from precomputation */
1860 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1861 !felem_to_BN(y, g_pre_comp[1][1]) ||
1862 !felem_to_BN(z, g_pre_comp[1][2])) {
1863 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1864 goto err;
1865 }
1866 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1867 generator, x, y, z,
1868 ctx))
1869 goto err;
1870 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1871 /* precomputation matches generator */
1872 have_pre_comp = 1;
1873 else
1874 /*
1875 * we don't have valid precomputation: treat the generator as a
1876 * random point
1877 */
1878 num_points++;
1879 }
1880
1881 if (num_points > 0) {
1882 if (num_points >= 2) {
1883 /*
1884 * unless we precompute multiples for just one point, converting
1885 * those into affine form is time well spent
1886 */
1887 mixed = 1;
1888 }
1889 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1890 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1891 if (mixed)
1892 tmp_felems =
1893 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1894 if ((secrets == NULL) || (pre_comp == NULL)
1895 || (mixed && (tmp_felems == NULL))) {
1896 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1897 goto err;
1898 }
1899
1900 /*
1901 * we treat NULL scalars as 0, and NULL points as points at infinity,
1902 * i.e., they contribute nothing to the linear combination
1903 */
1904 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1905 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1906 for (i = 0; i < num_points; ++i) {
1907 if (i == num)
1908 /*
1909 * we didn't have a valid precomputation, so we pick the
1910 * generator
1911 */
1912 {
1913 p = EC_GROUP_get0_generator(group);
1914 p_scalar = scalar;
1915 } else
1916 /* the i^th point */
1917 {
1918 p = points[i];
1919 p_scalar = scalars[i];
1920 }
1921 if ((p_scalar != NULL) && (p != NULL)) {
1922 /* reduce scalar to 0 <= scalar < 2^521 */
1923 if ((BN_num_bits(p_scalar) > 521)
1924 || (BN_is_negative(p_scalar))) {
1925 /*
1926 * this is an unusual input, and we don't guarantee
1927 * constant-timeness
1928 */
1929 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1930 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1931 goto err;
1932 }
1933 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1934 } else
1935 num_bytes = BN_bn2bin(p_scalar, tmp);
1936 flip_endian(secrets[i], tmp, num_bytes);
1937 /* precompute multiples */
1938 if ((!BN_to_felem(x_out, &p->X)) ||
1939 (!BN_to_felem(y_out, &p->Y)) ||
1940 (!BN_to_felem(z_out, &p->Z)))
1941 goto err;
1942 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1943 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1944 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1945 for (j = 2; j <= 16; ++j) {
1946 if (j & 1) {
1947 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1948 pre_comp[i][j][2], pre_comp[i][1][0],
1949 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1950 pre_comp[i][j - 1][0],
1951 pre_comp[i][j - 1][1],
1952 pre_comp[i][j - 1][2]);
1953 } else {
1954 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1955 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1956 pre_comp[i][j / 2][1],
1957 pre_comp[i][j / 2][2]);
1958 }
1959 }
1960 }
1961 }
1962 if (mixed)
1963 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1964 }
1965
1966 /* the scalar for the generator */
1967 if ((scalar != NULL) && (have_pre_comp)) {
1968 memset(g_secret, 0, sizeof(g_secret));
1969 /* reduce scalar to 0 <= scalar < 2^521 */
1970 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) {
1971 /*
1972 * this is an unusual input, and we don't guarantee
1973 * constant-timeness
1974 */
1975 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1976 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1977 goto err;
1978 }
1979 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1980 } else
1981 num_bytes = BN_bn2bin(scalar, tmp);
1982 flip_endian(g_secret, tmp, num_bytes);
1983 /* do the multiplication with generator precomputation */
1984 batch_mul(x_out, y_out, z_out,
1985 (const felem_bytearray(*))secrets, num_points,
1986 g_secret,
1987 mixed, (const felem(*)[17][3])pre_comp,
1988 (const felem(*)[3])g_pre_comp);
1989 } else
1990 /* do the multiplication without generator precomputation */
1991 batch_mul(x_out, y_out, z_out,
1992 (const felem_bytearray(*))secrets, num_points,
1993 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1994 /* reduce the output to its unique minimal representation */
1995 felem_contract(x_in, x_out);
1996 felem_contract(y_in, y_out);
1997 felem_contract(z_in, z_out);
1998 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1999 (!felem_to_BN(z, z_in))) {
2000 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
2001 goto err;
2002 }
2003 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2004
2005 err:
2006 BN_CTX_end(ctx);
2007 if (generator != NULL)
2008 EC_POINT_free(generator);
2009 if (new_ctx != NULL)
2010 BN_CTX_free(new_ctx);
2011 if (secrets != NULL)
2012 OPENSSL_free(secrets);
2013 if (pre_comp != NULL)
2014 OPENSSL_free(pre_comp);
2015 if (tmp_felems != NULL)
2016 OPENSSL_free(tmp_felems);
2017 return ret;
2018}
9c37519b
BM
2019
2020int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
ae5c8664
MC
2021{
2022 int ret = 0;
2023 NISTP521_PRE_COMP *pre = NULL;
2024 int i, j;
2025 BN_CTX *new_ctx = NULL;
2026 BIGNUM *x, *y;
2027 EC_POINT *generator = NULL;
2028 felem tmp_felems[16];
2029
2030 /* throw away old precomputation */
2031 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
2032 nistp521_pre_comp_free,
2033 nistp521_pre_comp_clear_free);
2034 if (ctx == NULL)
2035 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2036 return 0;
2037 BN_CTX_start(ctx);
2038 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2039 goto err;
2040 /* get the generator */
2041 if (group->generator == NULL)
2042 goto err;
2043 generator = EC_POINT_new(group);
2044 if (generator == NULL)
2045 goto err;
2046 BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);
2047 BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);
2048 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2049 goto err;
2050 if ((pre = nistp521_pre_comp_new()) == NULL)
2051 goto err;
2052 /*
2053 * if the generator is the standard one, use built-in precomputation
2054 */
2055 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2056 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
e94f52e0 2057 goto done;
ae5c8664
MC
2058 }
2059 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
2060 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
2061 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
2062 goto err;
2063 /* compute 2^130*G, 2^260*G, 2^390*G */
2064 for (i = 1; i <= 4; i <<= 1) {
2065 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],
2066 pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],
2067 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
2068 for (j = 0; j < 129; ++j) {
2069 point_double(pre->g_pre_comp[2 * i][0],
2070 pre->g_pre_comp[2 * i][1],
2071 pre->g_pre_comp[2 * i][2],
2072 pre->g_pre_comp[2 * i][0],
2073 pre->g_pre_comp[2 * i][1],
2074 pre->g_pre_comp[2 * i][2]);
2075 }
2076 }
2077 /* g_pre_comp[0] is the point at infinity */
2078 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
2079 /* the remaining multiples */
2080 /* 2^130*G + 2^260*G */
2081 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2082 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2083 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2084 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2085 pre->g_pre_comp[2][2]);
2086 /* 2^130*G + 2^390*G */
2087 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2088 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2089 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2090 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2091 pre->g_pre_comp[2][2]);
2092 /* 2^260*G + 2^390*G */
2093 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2094 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2095 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2096 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2097 pre->g_pre_comp[4][2]);
2098 /* 2^130*G + 2^260*G + 2^390*G */
2099 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2100 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2101 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2102 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2103 pre->g_pre_comp[2][2]);
2104 for (i = 1; i < 8; ++i) {
2105 /* odd multiples: add G */
2106 point_add(pre->g_pre_comp[2 * i + 1][0],
2107 pre->g_pre_comp[2 * i + 1][1],
2108 pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],
2109 pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,
2110 pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2111 pre->g_pre_comp[1][2]);
2112 }
2113 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2114
e94f52e0 2115 done:
ae5c8664
MC
2116 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2117 nistp521_pre_comp_free,
2118 nistp521_pre_comp_clear_free))
2119 goto err;
2120 ret = 1;
2121 pre = NULL;
9c37519b 2122 err:
ae5c8664
MC
2123 BN_CTX_end(ctx);
2124 if (generator != NULL)
2125 EC_POINT_free(generator);
2126 if (new_ctx != NULL)
2127 BN_CTX_free(new_ctx);
2128 if (pre)
2129 nistp521_pre_comp_free(pre);
2130 return ret;
2131}
9c37519b
BM
2132
2133int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
ae5c8664
MC
2134{
2135 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2136 nistp521_pre_comp_free,
2137 nistp521_pre_comp_clear_free)
2138 != NULL)
2139 return 1;
2140 else
2141 return 0;
2142}
9c37519b
BM
2143
2144#else
ae5c8664 2145static void *dummy = &dummy;
9c37519b 2146#endif