]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp521.c
Re-align some comments after running the reformat script.
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
1 /* crypto/ec/ecp_nistp521.c */
2 /*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32 # ifndef OPENSSL_SYS_VMS
33 # include <stdint.h>
34 # else
35 # include <inttypes.h>
36 # endif
37
38 # include <string.h>
39 # include <openssl/err.h>
40 # include "ec_lcl.h"
41
42 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
45 * platforms */
46 # else
47 # error "Need GCC 3.1 or later to define type uint128_t"
48 # endif
49
50 typedef uint8_t u8;
51 typedef uint64_t u64;
52 typedef int64_t s64;
53
54 /*
55 * The underlying field. P521 operates over GF(2^521-1). We can serialise an
56 * element of this field into 66 bytes where the most significant byte
57 * contains only a single bit. We call this an felem_bytearray.
58 */
59
60 typedef u8 felem_bytearray[66];
61
62 /*
63 * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
64 * These values are big-endian.
65 */
66 static const felem_bytearray nistp521_curve_params[5] = {
67 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75 0xff, 0xff},
76 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
83 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
84 0xff, 0xfc},
85 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
86 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
87 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
88 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
89 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
90 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
91 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
92 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
93 0x3f, 0x00},
94 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
95 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
96 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
97 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
98 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
99 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
100 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
101 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
102 0xbd, 0x66},
103 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
104 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
105 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
106 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
107 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
108 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
109 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
110 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
111 0x66, 0x50}
112 };
113
114 /*-
115 * The representation of field elements.
116 * ------------------------------------
117 *
118 * We represent field elements with nine values. These values are either 64 or
119 * 128 bits and the field element represented is:
120 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
121 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
122 * 58 bits apart, but are greater than 58 bits in length, the most significant
123 * bits of each limb overlap with the least significant bits of the next.
124 *
125 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
126 * 'largefelem' */
127
128 # define NLIMBS 9
129
130 typedef uint64_t limb;
131 typedef limb felem[NLIMBS];
132 typedef uint128_t largefelem[NLIMBS];
133
134 static const limb bottom57bits = 0x1ffffffffffffff;
135 static const limb bottom58bits = 0x3ffffffffffffff;
136
137 /*
138 * bin66_to_felem takes a little-endian byte array and converts it into felem
139 * form. This assumes that the CPU is little-endian.
140 */
141 static void bin66_to_felem(felem out, const u8 in[66])
142 {
143 out[0] = (*((limb *) & in[0])) & bottom58bits;
144 out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits;
145 out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits;
146 out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits;
147 out[4] = (*((limb *) & in[29])) & bottom58bits;
148 out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits;
149 out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits;
150 out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits;
151 out[8] = (*((limb *) & in[58])) & bottom57bits;
152 }
153
154 /*
155 * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
156 * array. This assumes that the CPU is little-endian.
157 */
158 static void felem_to_bin66(u8 out[66], const felem in)
159 {
160 memset(out, 0, 66);
161 (*((limb *) & out[0])) = in[0];
162 (*((limb *) & out[7])) |= in[1] << 2;
163 (*((limb *) & out[14])) |= in[2] << 4;
164 (*((limb *) & out[21])) |= in[3] << 6;
165 (*((limb *) & out[29])) = in[4];
166 (*((limb *) & out[36])) |= in[5] << 2;
167 (*((limb *) & out[43])) |= in[6] << 4;
168 (*((limb *) & out[50])) |= in[7] << 6;
169 (*((limb *) & out[58])) = in[8];
170 }
171
172 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
173 static void flip_endian(u8 *out, const u8 *in, unsigned len)
174 {
175 unsigned i;
176 for (i = 0; i < len; ++i)
177 out[i] = in[len - 1 - i];
178 }
179
180 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
181 static int BN_to_felem(felem out, const BIGNUM *bn)
182 {
183 felem_bytearray b_in;
184 felem_bytearray b_out;
185 unsigned num_bytes;
186
187 /* BN_bn2bin eats leading zeroes */
188 memset(b_out, 0, sizeof b_out);
189 num_bytes = BN_num_bytes(bn);
190 if (num_bytes > sizeof b_out) {
191 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
192 return 0;
193 }
194 if (BN_is_negative(bn)) {
195 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
196 return 0;
197 }
198 num_bytes = BN_bn2bin(bn, b_in);
199 flip_endian(b_out, b_in, num_bytes);
200 bin66_to_felem(out, b_out);
201 return 1;
202 }
203
204 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
205 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
206 {
207 felem_bytearray b_in, b_out;
208 felem_to_bin66(b_in, in);
209 flip_endian(b_out, b_in, sizeof b_out);
210 return BN_bin2bn(b_out, sizeof b_out, out);
211 }
212
213 /*-
214 * Field operations
215 * ----------------
216 */
217
218 static void felem_one(felem out)
219 {
220 out[0] = 1;
221 out[1] = 0;
222 out[2] = 0;
223 out[3] = 0;
224 out[4] = 0;
225 out[5] = 0;
226 out[6] = 0;
227 out[7] = 0;
228 out[8] = 0;
229 }
230
231 static void felem_assign(felem out, const felem in)
232 {
233 out[0] = in[0];
234 out[1] = in[1];
235 out[2] = in[2];
236 out[3] = in[3];
237 out[4] = in[4];
238 out[5] = in[5];
239 out[6] = in[6];
240 out[7] = in[7];
241 out[8] = in[8];
242 }
243
244 /* felem_sum64 sets out = out + in. */
245 static void felem_sum64(felem out, const felem in)
246 {
247 out[0] += in[0];
248 out[1] += in[1];
249 out[2] += in[2];
250 out[3] += in[3];
251 out[4] += in[4];
252 out[5] += in[5];
253 out[6] += in[6];
254 out[7] += in[7];
255 out[8] += in[8];
256 }
257
258 /* felem_scalar sets out = in * scalar */
259 static void felem_scalar(felem out, const felem in, limb scalar)
260 {
261 out[0] = in[0] * scalar;
262 out[1] = in[1] * scalar;
263 out[2] = in[2] * scalar;
264 out[3] = in[3] * scalar;
265 out[4] = in[4] * scalar;
266 out[5] = in[5] * scalar;
267 out[6] = in[6] * scalar;
268 out[7] = in[7] * scalar;
269 out[8] = in[8] * scalar;
270 }
271
272 /* felem_scalar64 sets out = out * scalar */
273 static void felem_scalar64(felem out, limb scalar)
274 {
275 out[0] *= scalar;
276 out[1] *= scalar;
277 out[2] *= scalar;
278 out[3] *= scalar;
279 out[4] *= scalar;
280 out[5] *= scalar;
281 out[6] *= scalar;
282 out[7] *= scalar;
283 out[8] *= scalar;
284 }
285
286 /* felem_scalar128 sets out = out * scalar */
287 static void felem_scalar128(largefelem out, limb scalar)
288 {
289 out[0] *= scalar;
290 out[1] *= scalar;
291 out[2] *= scalar;
292 out[3] *= scalar;
293 out[4] *= scalar;
294 out[5] *= scalar;
295 out[6] *= scalar;
296 out[7] *= scalar;
297 out[8] *= scalar;
298 }
299
300 /*-
301 * felem_neg sets |out| to |-in|
302 * On entry:
303 * in[i] < 2^59 + 2^14
304 * On exit:
305 * out[i] < 2^62
306 */
307 static void felem_neg(felem out, const felem in)
308 {
309 /* In order to prevent underflow, we subtract from 0 mod p. */
310 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
311 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
312
313 out[0] = two62m3 - in[0];
314 out[1] = two62m2 - in[1];
315 out[2] = two62m2 - in[2];
316 out[3] = two62m2 - in[3];
317 out[4] = two62m2 - in[4];
318 out[5] = two62m2 - in[5];
319 out[6] = two62m2 - in[6];
320 out[7] = two62m2 - in[7];
321 out[8] = two62m2 - in[8];
322 }
323
324 /*-
325 * felem_diff64 subtracts |in| from |out|
326 * On entry:
327 * in[i] < 2^59 + 2^14
328 * On exit:
329 * out[i] < out[i] + 2^62
330 */
331 static void felem_diff64(felem out, const felem in)
332 {
333 /*
334 * In order to prevent underflow, we add 0 mod p before subtracting.
335 */
336 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
337 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
338
339 out[0] += two62m3 - in[0];
340 out[1] += two62m2 - in[1];
341 out[2] += two62m2 - in[2];
342 out[3] += two62m2 - in[3];
343 out[4] += two62m2 - in[4];
344 out[5] += two62m2 - in[5];
345 out[6] += two62m2 - in[6];
346 out[7] += two62m2 - in[7];
347 out[8] += two62m2 - in[8];
348 }
349
350 /*-
351 * felem_diff_128_64 subtracts |in| from |out|
352 * On entry:
353 * in[i] < 2^62 + 2^17
354 * On exit:
355 * out[i] < out[i] + 2^63
356 */
357 static void felem_diff_128_64(largefelem out, const felem in)
358 {
359 /*
360 * In order to prevent underflow, we add 0 mod p before subtracting.
361 */
362 static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5);
363 static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4);
364
365 out[0] += two63m6 - in[0];
366 out[1] += two63m5 - in[1];
367 out[2] += two63m5 - in[2];
368 out[3] += two63m5 - in[3];
369 out[4] += two63m5 - in[4];
370 out[5] += two63m5 - in[5];
371 out[6] += two63m5 - in[6];
372 out[7] += two63m5 - in[7];
373 out[8] += two63m5 - in[8];
374 }
375
376 /*-
377 * felem_diff_128_64 subtracts |in| from |out|
378 * On entry:
379 * in[i] < 2^126
380 * On exit:
381 * out[i] < out[i] + 2^127 - 2^69
382 */
383 static void felem_diff128(largefelem out, const largefelem in)
384 {
385 /*
386 * In order to prevent underflow, we add 0 mod p before subtracting.
387 */
388 static const uint128_t two127m70 =
389 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);
390 static const uint128_t two127m69 =
391 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);
392
393 out[0] += (two127m70 - in[0]);
394 out[1] += (two127m69 - in[1]);
395 out[2] += (two127m69 - in[2]);
396 out[3] += (two127m69 - in[3]);
397 out[4] += (two127m69 - in[4]);
398 out[5] += (two127m69 - in[5]);
399 out[6] += (two127m69 - in[6]);
400 out[7] += (two127m69 - in[7]);
401 out[8] += (two127m69 - in[8]);
402 }
403
404 /*-
405 * felem_square sets |out| = |in|^2
406 * On entry:
407 * in[i] < 2^62
408 * On exit:
409 * out[i] < 17 * max(in[i]) * max(in[i])
410 */
411 static void felem_square(largefelem out, const felem in)
412 {
413 felem inx2, inx4;
414 felem_scalar(inx2, in, 2);
415 felem_scalar(inx4, in, 4);
416
417 /*-
418 * We have many cases were we want to do
419 * in[x] * in[y] +
420 * in[y] * in[x]
421 * This is obviously just
422 * 2 * in[x] * in[y]
423 * However, rather than do the doubling on the 128 bit result, we
424 * double one of the inputs to the multiplication by reading from
425 * |inx2| */
426
427 out[0] = ((uint128_t) in[0]) * in[0];
428 out[1] = ((uint128_t) in[0]) * inx2[1];
429 out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1];
430 out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2];
431 out[4] = ((uint128_t) in[0]) * inx2[4] +
432 ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2];
433 out[5] = ((uint128_t) in[0]) * inx2[5] +
434 ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3];
435 out[6] = ((uint128_t) in[0]) * inx2[6] +
436 ((uint128_t) in[1]) * inx2[5] +
437 ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3];
438 out[7] = ((uint128_t) in[0]) * inx2[7] +
439 ((uint128_t) in[1]) * inx2[6] +
440 ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4];
441 out[8] = ((uint128_t) in[0]) * inx2[8] +
442 ((uint128_t) in[1]) * inx2[7] +
443 ((uint128_t) in[2]) * inx2[6] +
444 ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4];
445
446 /*
447 * The remaining limbs fall above 2^521, with the first falling at 2^522.
448 * They correspond to locations one bit up from the limbs produced above
449 * so we would have to multiply by two to align them. Again, rather than
450 * operate on the 128-bit result, we double one of the inputs to the
451 * multiplication. If we want to double for both this reason, and the
452 * reason above, then we end up multiplying by four.
453 */
454
455 /* 9 */
456 out[0] += ((uint128_t) in[1]) * inx4[8] +
457 ((uint128_t) in[2]) * inx4[7] +
458 ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5];
459
460 /* 10 */
461 out[1] += ((uint128_t) in[2]) * inx4[8] +
462 ((uint128_t) in[3]) * inx4[7] +
463 ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5];
464
465 /* 11 */
466 out[2] += ((uint128_t) in[3]) * inx4[8] +
467 ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6];
468
469 /* 12 */
470 out[3] += ((uint128_t) in[4]) * inx4[8] +
471 ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6];
472
473 /* 13 */
474 out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7];
475
476 /* 14 */
477 out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7];
478
479 /* 15 */
480 out[6] += ((uint128_t) in[7]) * inx4[8];
481
482 /* 16 */
483 out[7] += ((uint128_t) in[8]) * inx2[8];
484 }
485
486 /*-
487 * felem_mul sets |out| = |in1| * |in2|
488 * On entry:
489 * in1[i] < 2^64
490 * in2[i] < 2^63
491 * On exit:
492 * out[i] < 17 * max(in1[i]) * max(in2[i])
493 */
494 static void felem_mul(largefelem out, const felem in1, const felem in2)
495 {
496 felem in2x2;
497 felem_scalar(in2x2, in2, 2);
498
499 out[0] = ((uint128_t) in1[0]) * in2[0];
500
501 out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0];
502
503 out[2] = ((uint128_t) in1[0]) * in2[2] +
504 ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0];
505
506 out[3] = ((uint128_t) in1[0]) * in2[3] +
507 ((uint128_t) in1[1]) * in2[2] +
508 ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0];
509
510 out[4] = ((uint128_t) in1[0]) * in2[4] +
511 ((uint128_t) in1[1]) * in2[3] +
512 ((uint128_t) in1[2]) * in2[2] +
513 ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0];
514
515 out[5] = ((uint128_t) in1[0]) * in2[5] +
516 ((uint128_t) in1[1]) * in2[4] +
517 ((uint128_t) in1[2]) * in2[3] +
518 ((uint128_t) in1[3]) * in2[2] +
519 ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0];
520
521 out[6] = ((uint128_t) in1[0]) * in2[6] +
522 ((uint128_t) in1[1]) * in2[5] +
523 ((uint128_t) in1[2]) * in2[4] +
524 ((uint128_t) in1[3]) * in2[3] +
525 ((uint128_t) in1[4]) * in2[2] +
526 ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0];
527
528 out[7] = ((uint128_t) in1[0]) * in2[7] +
529 ((uint128_t) in1[1]) * in2[6] +
530 ((uint128_t) in1[2]) * in2[5] +
531 ((uint128_t) in1[3]) * in2[4] +
532 ((uint128_t) in1[4]) * in2[3] +
533 ((uint128_t) in1[5]) * in2[2] +
534 ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0];
535
536 out[8] = ((uint128_t) in1[0]) * in2[8] +
537 ((uint128_t) in1[1]) * in2[7] +
538 ((uint128_t) in1[2]) * in2[6] +
539 ((uint128_t) in1[3]) * in2[5] +
540 ((uint128_t) in1[4]) * in2[4] +
541 ((uint128_t) in1[5]) * in2[3] +
542 ((uint128_t) in1[6]) * in2[2] +
543 ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0];
544
545 /* See comment in felem_square about the use of in2x2 here */
546
547 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
548 ((uint128_t) in1[2]) * in2x2[7] +
549 ((uint128_t) in1[3]) * in2x2[6] +
550 ((uint128_t) in1[4]) * in2x2[5] +
551 ((uint128_t) in1[5]) * in2x2[4] +
552 ((uint128_t) in1[6]) * in2x2[3] +
553 ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1];
554
555 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
556 ((uint128_t) in1[3]) * in2x2[7] +
557 ((uint128_t) in1[4]) * in2x2[6] +
558 ((uint128_t) in1[5]) * in2x2[5] +
559 ((uint128_t) in1[6]) * in2x2[4] +
560 ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2];
561
562 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
563 ((uint128_t) in1[4]) * in2x2[7] +
564 ((uint128_t) in1[5]) * in2x2[6] +
565 ((uint128_t) in1[6]) * in2x2[5] +
566 ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3];
567
568 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
569 ((uint128_t) in1[5]) * in2x2[7] +
570 ((uint128_t) in1[6]) * in2x2[6] +
571 ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4];
572
573 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
574 ((uint128_t) in1[6]) * in2x2[7] +
575 ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5];
576
577 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
578 ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6];
579
580 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
581 ((uint128_t) in1[8]) * in2x2[7];
582
583 out[7] += ((uint128_t) in1[8]) * in2x2[8];
584 }
585
586 static const limb bottom52bits = 0xfffffffffffff;
587
588 /*-
589 * felem_reduce converts a largefelem to an felem.
590 * On entry:
591 * in[i] < 2^128
592 * On exit:
593 * out[i] < 2^59 + 2^14
594 */
595 static void felem_reduce(felem out, const largefelem in)
596 {
597 u64 overflow1, overflow2;
598
599 out[0] = ((limb) in[0]) & bottom58bits;
600 out[1] = ((limb) in[1]) & bottom58bits;
601 out[2] = ((limb) in[2]) & bottom58bits;
602 out[3] = ((limb) in[3]) & bottom58bits;
603 out[4] = ((limb) in[4]) & bottom58bits;
604 out[5] = ((limb) in[5]) & bottom58bits;
605 out[6] = ((limb) in[6]) & bottom58bits;
606 out[7] = ((limb) in[7]) & bottom58bits;
607 out[8] = ((limb) in[8]) & bottom58bits;
608
609 /* out[i] < 2^58 */
610
611 out[1] += ((limb) in[0]) >> 58;
612 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
613 /*-
614 * out[1] < 2^58 + 2^6 + 2^58
615 * = 2^59 + 2^6
616 */
617 out[2] += ((limb) (in[0] >> 64)) >> 52;
618
619 out[2] += ((limb) in[1]) >> 58;
620 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
621 out[3] += ((limb) (in[1] >> 64)) >> 52;
622
623 out[3] += ((limb) in[2]) >> 58;
624 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
625 out[4] += ((limb) (in[2] >> 64)) >> 52;
626
627 out[4] += ((limb) in[3]) >> 58;
628 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
629 out[5] += ((limb) (in[3] >> 64)) >> 52;
630
631 out[5] += ((limb) in[4]) >> 58;
632 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
633 out[6] += ((limb) (in[4] >> 64)) >> 52;
634
635 out[6] += ((limb) in[5]) >> 58;
636 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
637 out[7] += ((limb) (in[5] >> 64)) >> 52;
638
639 out[7] += ((limb) in[6]) >> 58;
640 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
641 out[8] += ((limb) (in[6] >> 64)) >> 52;
642
643 out[8] += ((limb) in[7]) >> 58;
644 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
645 /*-
646 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
647 * < 2^59 + 2^13
648 */
649 overflow1 = ((limb) (in[7] >> 64)) >> 52;
650
651 overflow1 += ((limb) in[8]) >> 58;
652 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
653 overflow2 = ((limb) (in[8] >> 64)) >> 52;
654
655 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
656 overflow2 <<= 1; /* overflow2 < 2^13 */
657
658 out[0] += overflow1; /* out[0] < 2^60 */
659 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
660
661 out[1] += out[0] >> 58;
662 out[0] &= bottom58bits;
663 /*-
664 * out[0] < 2^58
665 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
666 * < 2^59 + 2^14
667 */
668 }
669
670 static void felem_square_reduce(felem out, const felem in)
671 {
672 largefelem tmp;
673 felem_square(tmp, in);
674 felem_reduce(out, tmp);
675 }
676
677 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
678 {
679 largefelem tmp;
680 felem_mul(tmp, in1, in2);
681 felem_reduce(out, tmp);
682 }
683
684 /*-
685 * felem_inv calculates |out| = |in|^{-1}
686 *
687 * Based on Fermat's Little Theorem:
688 * a^p = a (mod p)
689 * a^{p-1} = 1 (mod p)
690 * a^{p-2} = a^{-1} (mod p)
691 */
692 static void felem_inv(felem out, const felem in)
693 {
694 felem ftmp, ftmp2, ftmp3, ftmp4;
695 largefelem tmp;
696 unsigned i;
697
698 felem_square(tmp, in);
699 felem_reduce(ftmp, tmp); /* 2^1 */
700 felem_mul(tmp, in, ftmp);
701 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
702 felem_assign(ftmp2, ftmp);
703 felem_square(tmp, ftmp);
704 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
705 felem_mul(tmp, in, ftmp);
706 felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
707 felem_square(tmp, ftmp);
708 felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
709
710 felem_square(tmp, ftmp2);
711 felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
712 felem_square(tmp, ftmp3);
713 felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
714 felem_mul(tmp, ftmp3, ftmp2);
715 felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
716
717 felem_assign(ftmp2, ftmp3);
718 felem_square(tmp, ftmp3);
719 felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
720 felem_square(tmp, ftmp3);
721 felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
722 felem_square(tmp, ftmp3);
723 felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
724 felem_square(tmp, ftmp3);
725 felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
726 felem_assign(ftmp4, ftmp3);
727 felem_mul(tmp, ftmp3, ftmp);
728 felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
729 felem_square(tmp, ftmp4);
730 felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
731 felem_mul(tmp, ftmp3, ftmp2);
732 felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
733 felem_assign(ftmp2, ftmp3);
734
735 for (i = 0; i < 8; i++) {
736 felem_square(tmp, ftmp3);
737 felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
738 }
739 felem_mul(tmp, ftmp3, ftmp2);
740 felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
741 felem_assign(ftmp2, ftmp3);
742
743 for (i = 0; i < 16; i++) {
744 felem_square(tmp, ftmp3);
745 felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
746 }
747 felem_mul(tmp, ftmp3, ftmp2);
748 felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
749 felem_assign(ftmp2, ftmp3);
750
751 for (i = 0; i < 32; i++) {
752 felem_square(tmp, ftmp3);
753 felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
754 }
755 felem_mul(tmp, ftmp3, ftmp2);
756 felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
757 felem_assign(ftmp2, ftmp3);
758
759 for (i = 0; i < 64; i++) {
760 felem_square(tmp, ftmp3);
761 felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
762 }
763 felem_mul(tmp, ftmp3, ftmp2);
764 felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
765 felem_assign(ftmp2, ftmp3);
766
767 for (i = 0; i < 128; i++) {
768 felem_square(tmp, ftmp3);
769 felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
770 }
771 felem_mul(tmp, ftmp3, ftmp2);
772 felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
773 felem_assign(ftmp2, ftmp3);
774
775 for (i = 0; i < 256; i++) {
776 felem_square(tmp, ftmp3);
777 felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
778 }
779 felem_mul(tmp, ftmp3, ftmp2);
780 felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
781
782 for (i = 0; i < 9; i++) {
783 felem_square(tmp, ftmp3);
784 felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
785 }
786 felem_mul(tmp, ftmp3, ftmp4);
787 felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
788 felem_mul(tmp, ftmp3, in);
789 felem_reduce(out, tmp); /* 2^512 - 3 */
790 }
791
792 /* This is 2^521-1, expressed as an felem */
793 static const felem kPrime = {
794 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
795 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
796 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
797 };
798
799 /*-
800 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
801 * otherwise.
802 * On entry:
803 * in[i] < 2^59 + 2^14
804 */
805 static limb felem_is_zero(const felem in)
806 {
807 felem ftmp;
808 limb is_zero, is_p;
809 felem_assign(ftmp, in);
810
811 ftmp[0] += ftmp[8] >> 57;
812 ftmp[8] &= bottom57bits;
813 /* ftmp[8] < 2^57 */
814 ftmp[1] += ftmp[0] >> 58;
815 ftmp[0] &= bottom58bits;
816 ftmp[2] += ftmp[1] >> 58;
817 ftmp[1] &= bottom58bits;
818 ftmp[3] += ftmp[2] >> 58;
819 ftmp[2] &= bottom58bits;
820 ftmp[4] += ftmp[3] >> 58;
821 ftmp[3] &= bottom58bits;
822 ftmp[5] += ftmp[4] >> 58;
823 ftmp[4] &= bottom58bits;
824 ftmp[6] += ftmp[5] >> 58;
825 ftmp[5] &= bottom58bits;
826 ftmp[7] += ftmp[6] >> 58;
827 ftmp[6] &= bottom58bits;
828 ftmp[8] += ftmp[7] >> 58;
829 ftmp[7] &= bottom58bits;
830 /* ftmp[8] < 2^57 + 4 */
831
832 /*
833 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater
834 * than our bound for ftmp[8]. Therefore we only have to check if the
835 * zero is zero or 2^521-1.
836 */
837
838 is_zero = 0;
839 is_zero |= ftmp[0];
840 is_zero |= ftmp[1];
841 is_zero |= ftmp[2];
842 is_zero |= ftmp[3];
843 is_zero |= ftmp[4];
844 is_zero |= ftmp[5];
845 is_zero |= ftmp[6];
846 is_zero |= ftmp[7];
847 is_zero |= ftmp[8];
848
849 is_zero--;
850 /*
851 * We know that ftmp[i] < 2^63, therefore the only way that the top bit
852 * can be set is if is_zero was 0 before the decrement.
853 */
854 is_zero = ((s64) is_zero) >> 63;
855
856 is_p = ftmp[0] ^ kPrime[0];
857 is_p |= ftmp[1] ^ kPrime[1];
858 is_p |= ftmp[2] ^ kPrime[2];
859 is_p |= ftmp[3] ^ kPrime[3];
860 is_p |= ftmp[4] ^ kPrime[4];
861 is_p |= ftmp[5] ^ kPrime[5];
862 is_p |= ftmp[6] ^ kPrime[6];
863 is_p |= ftmp[7] ^ kPrime[7];
864 is_p |= ftmp[8] ^ kPrime[8];
865
866 is_p--;
867 is_p = ((s64) is_p) >> 63;
868
869 is_zero |= is_p;
870 return is_zero;
871 }
872
873 static int felem_is_zero_int(const felem in)
874 {
875 return (int)(felem_is_zero(in) & ((limb) 1));
876 }
877
878 /*-
879 * felem_contract converts |in| to its unique, minimal representation.
880 * On entry:
881 * in[i] < 2^59 + 2^14
882 */
883 static void felem_contract(felem out, const felem in)
884 {
885 limb is_p, is_greater, sign;
886 static const limb two58 = ((limb) 1) << 58;
887
888 felem_assign(out, in);
889
890 out[0] += out[8] >> 57;
891 out[8] &= bottom57bits;
892 /* out[8] < 2^57 */
893 out[1] += out[0] >> 58;
894 out[0] &= bottom58bits;
895 out[2] += out[1] >> 58;
896 out[1] &= bottom58bits;
897 out[3] += out[2] >> 58;
898 out[2] &= bottom58bits;
899 out[4] += out[3] >> 58;
900 out[3] &= bottom58bits;
901 out[5] += out[4] >> 58;
902 out[4] &= bottom58bits;
903 out[6] += out[5] >> 58;
904 out[5] &= bottom58bits;
905 out[7] += out[6] >> 58;
906 out[6] &= bottom58bits;
907 out[8] += out[7] >> 58;
908 out[7] &= bottom58bits;
909 /* out[8] < 2^57 + 4 */
910
911 /*
912 * If the value is greater than 2^521-1 then we have to subtract 2^521-1
913 * out. See the comments in felem_is_zero regarding why we don't test for
914 * other multiples of the prime.
915 */
916
917 /*
918 * First, if |out| is equal to 2^521-1, we subtract it out to get zero.
919 */
920
921 is_p = out[0] ^ kPrime[0];
922 is_p |= out[1] ^ kPrime[1];
923 is_p |= out[2] ^ kPrime[2];
924 is_p |= out[3] ^ kPrime[3];
925 is_p |= out[4] ^ kPrime[4];
926 is_p |= out[5] ^ kPrime[5];
927 is_p |= out[6] ^ kPrime[6];
928 is_p |= out[7] ^ kPrime[7];
929 is_p |= out[8] ^ kPrime[8];
930
931 is_p--;
932 is_p &= is_p << 32;
933 is_p &= is_p << 16;
934 is_p &= is_p << 8;
935 is_p &= is_p << 4;
936 is_p &= is_p << 2;
937 is_p &= is_p << 1;
938 is_p = ((s64) is_p) >> 63;
939 is_p = ~is_p;
940
941 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
942
943 out[0] &= is_p;
944 out[1] &= is_p;
945 out[2] &= is_p;
946 out[3] &= is_p;
947 out[4] &= is_p;
948 out[5] &= is_p;
949 out[6] &= is_p;
950 out[7] &= is_p;
951 out[8] &= is_p;
952
953 /*
954 * In order to test that |out| >= 2^521-1 we need only test if out[8] >>
955 * 57 is greater than zero as (2^521-1) + x >= 2^522
956 */
957 is_greater = out[8] >> 57;
958 is_greater |= is_greater << 32;
959 is_greater |= is_greater << 16;
960 is_greater |= is_greater << 8;
961 is_greater |= is_greater << 4;
962 is_greater |= is_greater << 2;
963 is_greater |= is_greater << 1;
964 is_greater = ((s64) is_greater) >> 63;
965
966 out[0] -= kPrime[0] & is_greater;
967 out[1] -= kPrime[1] & is_greater;
968 out[2] -= kPrime[2] & is_greater;
969 out[3] -= kPrime[3] & is_greater;
970 out[4] -= kPrime[4] & is_greater;
971 out[5] -= kPrime[5] & is_greater;
972 out[6] -= kPrime[6] & is_greater;
973 out[7] -= kPrime[7] & is_greater;
974 out[8] -= kPrime[8] & is_greater;
975
976 /* Eliminate negative coefficients */
977 sign = -(out[0] >> 63);
978 out[0] += (two58 & sign);
979 out[1] -= (1 & sign);
980 sign = -(out[1] >> 63);
981 out[1] += (two58 & sign);
982 out[2] -= (1 & sign);
983 sign = -(out[2] >> 63);
984 out[2] += (two58 & sign);
985 out[3] -= (1 & sign);
986 sign = -(out[3] >> 63);
987 out[3] += (two58 & sign);
988 out[4] -= (1 & sign);
989 sign = -(out[4] >> 63);
990 out[4] += (two58 & sign);
991 out[5] -= (1 & sign);
992 sign = -(out[0] >> 63);
993 out[5] += (two58 & sign);
994 out[6] -= (1 & sign);
995 sign = -(out[6] >> 63);
996 out[6] += (two58 & sign);
997 out[7] -= (1 & sign);
998 sign = -(out[7] >> 63);
999 out[7] += (two58 & sign);
1000 out[8] -= (1 & sign);
1001 sign = -(out[5] >> 63);
1002 out[5] += (two58 & sign);
1003 out[6] -= (1 & sign);
1004 sign = -(out[6] >> 63);
1005 out[6] += (two58 & sign);
1006 out[7] -= (1 & sign);
1007 sign = -(out[7] >> 63);
1008 out[7] += (two58 & sign);
1009 out[8] -= (1 & sign);
1010 }
1011
1012 /*-
1013 * Group operations
1014 * ----------------
1015 *
1016 * Building on top of the field operations we have the operations on the
1017 * elliptic curve group itself. Points on the curve are represented in Jacobian
1018 * coordinates */
1019
1020 /*-
1021 * point_double calcuates 2*(x_in, y_in, z_in)
1022 *
1023 * The method is taken from:
1024 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1025 *
1026 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1027 * while x_out == y_in is not (maybe this works, but it's not tested). */
1028 static void
1029 point_double(felem x_out, felem y_out, felem z_out,
1030 const felem x_in, const felem y_in, const felem z_in)
1031 {
1032 largefelem tmp, tmp2;
1033 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1034
1035 felem_assign(ftmp, x_in);
1036 felem_assign(ftmp2, x_in);
1037
1038 /* delta = z^2 */
1039 felem_square(tmp, z_in);
1040 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
1041
1042 /* gamma = y^2 */
1043 felem_square(tmp, y_in);
1044 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
1045
1046 /* beta = x*gamma */
1047 felem_mul(tmp, x_in, gamma);
1048 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
1049
1050 /* alpha = 3*(x-delta)*(x+delta) */
1051 felem_diff64(ftmp, delta);
1052 /* ftmp[i] < 2^61 */
1053 felem_sum64(ftmp2, delta);
1054 /* ftmp2[i] < 2^60 + 2^15 */
1055 felem_scalar64(ftmp2, 3);
1056 /* ftmp2[i] < 3*2^60 + 3*2^15 */
1057 felem_mul(tmp, ftmp, ftmp2);
1058 /*-
1059 * tmp[i] < 17(3*2^121 + 3*2^76)
1060 * = 61*2^121 + 61*2^76
1061 * < 64*2^121 + 64*2^76
1062 * = 2^127 + 2^82
1063 * < 2^128
1064 */
1065 felem_reduce(alpha, tmp);
1066
1067 /* x' = alpha^2 - 8*beta */
1068 felem_square(tmp, alpha);
1069 /*
1070 * tmp[i] < 17*2^120 < 2^125
1071 */
1072 felem_assign(ftmp, beta);
1073 felem_scalar64(ftmp, 8);
1074 /* ftmp[i] < 2^62 + 2^17 */
1075 felem_diff_128_64(tmp, ftmp);
1076 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1077 felem_reduce(x_out, tmp);
1078
1079 /* z' = (y + z)^2 - gamma - delta */
1080 felem_sum64(delta, gamma);
1081 /* delta[i] < 2^60 + 2^15 */
1082 felem_assign(ftmp, y_in);
1083 felem_sum64(ftmp, z_in);
1084 /* ftmp[i] < 2^60 + 2^15 */
1085 felem_square(tmp, ftmp);
1086 /*
1087 * tmp[i] < 17(2^122) < 2^127
1088 */
1089 felem_diff_128_64(tmp, delta);
1090 /* tmp[i] < 2^127 + 2^63 */
1091 felem_reduce(z_out, tmp);
1092
1093 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1094 felem_scalar64(beta, 4);
1095 /* beta[i] < 2^61 + 2^16 */
1096 felem_diff64(beta, x_out);
1097 /* beta[i] < 2^61 + 2^60 + 2^16 */
1098 felem_mul(tmp, alpha, beta);
1099 /*-
1100 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1101 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1102 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1103 * < 2^128
1104 */
1105 felem_square(tmp2, gamma);
1106 /*-
1107 * tmp2[i] < 17*(2^59 + 2^14)^2
1108 * = 17*(2^118 + 2^74 + 2^28)
1109 */
1110 felem_scalar128(tmp2, 8);
1111 /*-
1112 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1113 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1114 * < 2^126
1115 */
1116 felem_diff128(tmp, tmp2);
1117 /*-
1118 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1119 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1120 * 2^74 + 2^69 + 2^34 + 2^30
1121 * < 2^128
1122 */
1123 felem_reduce(y_out, tmp);
1124 }
1125
1126 /* copy_conditional copies in to out iff mask is all ones. */
1127 static void copy_conditional(felem out, const felem in, limb mask)
1128 {
1129 unsigned i;
1130 for (i = 0; i < NLIMBS; ++i) {
1131 const limb tmp = mask & (in[i] ^ out[i]);
1132 out[i] ^= tmp;
1133 }
1134 }
1135
1136 /*-
1137 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1138 *
1139 * The method is taken from
1140 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1141 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1142 *
1143 * This function includes a branch for checking whether the two input points
1144 * are equal (while not equal to the point at infinity). This case never
1145 * happens during single point multiplication, so there is no timing leak for
1146 * ECDH or ECDSA signing. */
1147 static void point_add(felem x3, felem y3, felem z3,
1148 const felem x1, const felem y1, const felem z1,
1149 const int mixed, const felem x2, const felem y2,
1150 const felem z2)
1151 {
1152 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1153 largefelem tmp, tmp2;
1154 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1155
1156 z1_is_zero = felem_is_zero(z1);
1157 z2_is_zero = felem_is_zero(z2);
1158
1159 /* ftmp = z1z1 = z1**2 */
1160 felem_square(tmp, z1);
1161 felem_reduce(ftmp, tmp);
1162
1163 if (!mixed) {
1164 /* ftmp2 = z2z2 = z2**2 */
1165 felem_square(tmp, z2);
1166 felem_reduce(ftmp2, tmp);
1167
1168 /* u1 = ftmp3 = x1*z2z2 */
1169 felem_mul(tmp, x1, ftmp2);
1170 felem_reduce(ftmp3, tmp);
1171
1172 /* ftmp5 = z1 + z2 */
1173 felem_assign(ftmp5, z1);
1174 felem_sum64(ftmp5, z2);
1175 /* ftmp5[i] < 2^61 */
1176
1177 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1178 felem_square(tmp, ftmp5);
1179 /* tmp[i] < 17*2^122 */
1180 felem_diff_128_64(tmp, ftmp);
1181 /* tmp[i] < 17*2^122 + 2^63 */
1182 felem_diff_128_64(tmp, ftmp2);
1183 /* tmp[i] < 17*2^122 + 2^64 */
1184 felem_reduce(ftmp5, tmp);
1185
1186 /* ftmp2 = z2 * z2z2 */
1187 felem_mul(tmp, ftmp2, z2);
1188 felem_reduce(ftmp2, tmp);
1189
1190 /* s1 = ftmp6 = y1 * z2**3 */
1191 felem_mul(tmp, y1, ftmp2);
1192 felem_reduce(ftmp6, tmp);
1193 } else {
1194 /*
1195 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1196 */
1197
1198 /* u1 = ftmp3 = x1*z2z2 */
1199 felem_assign(ftmp3, x1);
1200
1201 /* ftmp5 = 2*z1z2 */
1202 felem_scalar(ftmp5, z1, 2);
1203
1204 /* s1 = ftmp6 = y1 * z2**3 */
1205 felem_assign(ftmp6, y1);
1206 }
1207
1208 /* u2 = x2*z1z1 */
1209 felem_mul(tmp, x2, ftmp);
1210 /* tmp[i] < 17*2^120 */
1211
1212 /* h = ftmp4 = u2 - u1 */
1213 felem_diff_128_64(tmp, ftmp3);
1214 /* tmp[i] < 17*2^120 + 2^63 */
1215 felem_reduce(ftmp4, tmp);
1216
1217 x_equal = felem_is_zero(ftmp4);
1218
1219 /* z_out = ftmp5 * h */
1220 felem_mul(tmp, ftmp5, ftmp4);
1221 felem_reduce(z_out, tmp);
1222
1223 /* ftmp = z1 * z1z1 */
1224 felem_mul(tmp, ftmp, z1);
1225 felem_reduce(ftmp, tmp);
1226
1227 /* s2 = tmp = y2 * z1**3 */
1228 felem_mul(tmp, y2, ftmp);
1229 /* tmp[i] < 17*2^120 */
1230
1231 /* r = ftmp5 = (s2 - s1)*2 */
1232 felem_diff_128_64(tmp, ftmp6);
1233 /* tmp[i] < 17*2^120 + 2^63 */
1234 felem_reduce(ftmp5, tmp);
1235 y_equal = felem_is_zero(ftmp5);
1236 felem_scalar64(ftmp5, 2);
1237 /* ftmp5[i] < 2^61 */
1238
1239 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1240 point_double(x3, y3, z3, x1, y1, z1);
1241 return;
1242 }
1243
1244 /* I = ftmp = (2h)**2 */
1245 felem_assign(ftmp, ftmp4);
1246 felem_scalar64(ftmp, 2);
1247 /* ftmp[i] < 2^61 */
1248 felem_square(tmp, ftmp);
1249 /* tmp[i] < 17*2^122 */
1250 felem_reduce(ftmp, tmp);
1251
1252 /* J = ftmp2 = h * I */
1253 felem_mul(tmp, ftmp4, ftmp);
1254 felem_reduce(ftmp2, tmp);
1255
1256 /* V = ftmp4 = U1 * I */
1257 felem_mul(tmp, ftmp3, ftmp);
1258 felem_reduce(ftmp4, tmp);
1259
1260 /* x_out = r**2 - J - 2V */
1261 felem_square(tmp, ftmp5);
1262 /* tmp[i] < 17*2^122 */
1263 felem_diff_128_64(tmp, ftmp2);
1264 /* tmp[i] < 17*2^122 + 2^63 */
1265 felem_assign(ftmp3, ftmp4);
1266 felem_scalar64(ftmp4, 2);
1267 /* ftmp4[i] < 2^61 */
1268 felem_diff_128_64(tmp, ftmp4);
1269 /* tmp[i] < 17*2^122 + 2^64 */
1270 felem_reduce(x_out, tmp);
1271
1272 /* y_out = r(V-x_out) - 2 * s1 * J */
1273 felem_diff64(ftmp3, x_out);
1274 /*
1275 * ftmp3[i] < 2^60 + 2^60 = 2^61
1276 */
1277 felem_mul(tmp, ftmp5, ftmp3);
1278 /* tmp[i] < 17*2^122 */
1279 felem_mul(tmp2, ftmp6, ftmp2);
1280 /* tmp2[i] < 17*2^120 */
1281 felem_scalar128(tmp2, 2);
1282 /* tmp2[i] < 17*2^121 */
1283 felem_diff128(tmp, tmp2);
1284 /*-
1285 * tmp[i] < 2^127 - 2^69 + 17*2^122
1286 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1287 * < 2^127
1288 */
1289 felem_reduce(y_out, tmp);
1290
1291 copy_conditional(x_out, x2, z1_is_zero);
1292 copy_conditional(x_out, x1, z2_is_zero);
1293 copy_conditional(y_out, y2, z1_is_zero);
1294 copy_conditional(y_out, y1, z2_is_zero);
1295 copy_conditional(z_out, z2, z1_is_zero);
1296 copy_conditional(z_out, z1, z2_is_zero);
1297 felem_assign(x3, x_out);
1298 felem_assign(y3, y_out);
1299 felem_assign(z3, z_out);
1300 }
1301
1302 /*-
1303 * Base point pre computation
1304 * --------------------------
1305 *
1306 * Two different sorts of precomputed tables are used in the following code.
1307 * Each contain various points on the curve, where each point is three field
1308 * elements (x, y, z).
1309 *
1310 * For the base point table, z is usually 1 (0 for the point at infinity).
1311 * This table has 16 elements:
1312 * index | bits | point
1313 * ------+---------+------------------------------
1314 * 0 | 0 0 0 0 | 0G
1315 * 1 | 0 0 0 1 | 1G
1316 * 2 | 0 0 1 0 | 2^130G
1317 * 3 | 0 0 1 1 | (2^130 + 1)G
1318 * 4 | 0 1 0 0 | 2^260G
1319 * 5 | 0 1 0 1 | (2^260 + 1)G
1320 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1321 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1322 * 8 | 1 0 0 0 | 2^390G
1323 * 9 | 1 0 0 1 | (2^390 + 1)G
1324 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1325 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1326 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1327 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1328 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1329 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1330 *
1331 * The reason for this is so that we can clock bits into four different
1332 * locations when doing simple scalar multiplies against the base point.
1333 *
1334 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1335
1336 /* gmul is the table of precomputed base points */
1337 static const felem gmul[16][3] = { {{0, 0, 0, 0, 0, 0, 0, 0, 0},
1338 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1339 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1340 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1341 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1342 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1343 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1344 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1345 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1346 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1347 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1348 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1349 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1350 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1351 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1352 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1353 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1354 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1355 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1356 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1357 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1358 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1359 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1360 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1361 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1362 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1363 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1364 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1365 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1366 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1367 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1368 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1369 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1370 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1371 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1372 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1373 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1374 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1375 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1376 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1377 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1378 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1379 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1380 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1381 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1382 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1383 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1384 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1385 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1386 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1387 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1388 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1389 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1390 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1391 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1392 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1393 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1394 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1395 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1396 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1397 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1398 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1399 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1400 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1401 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1402 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1403 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1404 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1405 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1406 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1407 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1408 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1409 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1410 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1411 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1412 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1413 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1414 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1415 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1416 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1417 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1418 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1419 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1420 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1421 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1422 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1423 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1424 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1425 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1426 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1427 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1428 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1429 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1430 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1431 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1432 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1433 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1434 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1435 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1436 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1437 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1438 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1439 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1440 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1441 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1442 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1443 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1444 {1, 0, 0, 0, 0, 0, 0, 0, 0}}
1445 };
1446
1447 /*
1448 * select_point selects the |idx|th point from a precomputation table and
1449 * copies it to out.
1450 */
1451 /* pre_comp below is of the size provided in |size| */
1452 static void select_point(const limb idx, unsigned int size,
1453 const felem pre_comp[][3], felem out[3])
1454 {
1455 unsigned i, j;
1456 limb *outlimbs = &out[0][0];
1457 memset(outlimbs, 0, 3 * sizeof(felem));
1458
1459 for (i = 0; i < size; i++) {
1460 const limb *inlimbs = &pre_comp[i][0][0];
1461 limb mask = i ^ idx;
1462 mask |= mask >> 4;
1463 mask |= mask >> 2;
1464 mask |= mask >> 1;
1465 mask &= 1;
1466 mask--;
1467 for (j = 0; j < NLIMBS * 3; j++)
1468 outlimbs[j] |= inlimbs[j] & mask;
1469 }
1470 }
1471
1472 /* get_bit returns the |i|th bit in |in| */
1473 static char get_bit(const felem_bytearray in, int i)
1474 {
1475 if (i < 0)
1476 return 0;
1477 return (in[i >> 3] >> (i & 7)) & 1;
1478 }
1479
1480 /*
1481 * Interleaved point multiplication using precomputed point multiples: The
1482 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1483 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1484 * generator, using certain (large) precomputed multiples in g_pre_comp.
1485 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1486 */
1487 static void batch_mul(felem x_out, felem y_out, felem z_out,
1488 const felem_bytearray scalars[],
1489 const unsigned num_points, const u8 *g_scalar,
1490 const int mixed, const felem pre_comp[][17][3],
1491 const felem g_pre_comp[16][3])
1492 {
1493 int i, skip;
1494 unsigned num, gen_mul = (g_scalar != NULL);
1495 felem nq[3], tmp[4];
1496 limb bits;
1497 u8 sign, digit;
1498
1499 /* set nq to the point at infinity */
1500 memset(nq, 0, 3 * sizeof(felem));
1501
1502 /*
1503 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1504 * of the generator (last quarter of rounds) and additions of other
1505 * points multiples (every 5th round).
1506 */
1507 skip = 1; /* save two point operations in the first
1508 * round */
1509 for (i = (num_points ? 520 : 130); i >= 0; --i) {
1510 /* double */
1511 if (!skip)
1512 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1513
1514 /* add multiples of the generator */
1515 if (gen_mul && (i <= 130)) {
1516 bits = get_bit(g_scalar, i + 390) << 3;
1517 if (i < 130) {
1518 bits |= get_bit(g_scalar, i + 260) << 2;
1519 bits |= get_bit(g_scalar, i + 130) << 1;
1520 bits |= get_bit(g_scalar, i);
1521 }
1522 /* select the point to add, in constant time */
1523 select_point(bits, 16, g_pre_comp, tmp);
1524 if (!skip) {
1525 /* The 1 argument below is for "mixed" */
1526 point_add(nq[0], nq[1], nq[2],
1527 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1528 } else {
1529 memcpy(nq, tmp, 3 * sizeof(felem));
1530 skip = 0;
1531 }
1532 }
1533
1534 /* do other additions every 5 doublings */
1535 if (num_points && (i % 5 == 0)) {
1536 /* loop over all scalars */
1537 for (num = 0; num < num_points; ++num) {
1538 bits = get_bit(scalars[num], i + 4) << 5;
1539 bits |= get_bit(scalars[num], i + 3) << 4;
1540 bits |= get_bit(scalars[num], i + 2) << 3;
1541 bits |= get_bit(scalars[num], i + 1) << 2;
1542 bits |= get_bit(scalars[num], i) << 1;
1543 bits |= get_bit(scalars[num], i - 1);
1544 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1545
1546 /*
1547 * select the point to add or subtract, in constant time
1548 */
1549 select_point(digit, 17, pre_comp[num], tmp);
1550 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1551 * point */
1552 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1553
1554 if (!skip) {
1555 point_add(nq[0], nq[1], nq[2],
1556 nq[0], nq[1], nq[2],
1557 mixed, tmp[0], tmp[1], tmp[2]);
1558 } else {
1559 memcpy(nq, tmp, 3 * sizeof(felem));
1560 skip = 0;
1561 }
1562 }
1563 }
1564 }
1565 felem_assign(x_out, nq[0]);
1566 felem_assign(y_out, nq[1]);
1567 felem_assign(z_out, nq[2]);
1568 }
1569
1570 /* Precomputation for the group generator. */
1571 typedef struct {
1572 felem g_pre_comp[16][3];
1573 int references;
1574 } NISTP521_PRE_COMP;
1575
1576 const EC_METHOD *EC_GFp_nistp521_method(void)
1577 {
1578 static const EC_METHOD ret = {
1579 EC_FLAGS_DEFAULT_OCT,
1580 NID_X9_62_prime_field,
1581 ec_GFp_nistp521_group_init,
1582 ec_GFp_simple_group_finish,
1583 ec_GFp_simple_group_clear_finish,
1584 ec_GFp_nist_group_copy,
1585 ec_GFp_nistp521_group_set_curve,
1586 ec_GFp_simple_group_get_curve,
1587 ec_GFp_simple_group_get_degree,
1588 ec_GFp_simple_group_check_discriminant,
1589 ec_GFp_simple_point_init,
1590 ec_GFp_simple_point_finish,
1591 ec_GFp_simple_point_clear_finish,
1592 ec_GFp_simple_point_copy,
1593 ec_GFp_simple_point_set_to_infinity,
1594 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1595 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1596 ec_GFp_simple_point_set_affine_coordinates,
1597 ec_GFp_nistp521_point_get_affine_coordinates,
1598 0 /* point_set_compressed_coordinates */ ,
1599 0 /* point2oct */ ,
1600 0 /* oct2point */ ,
1601 ec_GFp_simple_add,
1602 ec_GFp_simple_dbl,
1603 ec_GFp_simple_invert,
1604 ec_GFp_simple_is_at_infinity,
1605 ec_GFp_simple_is_on_curve,
1606 ec_GFp_simple_cmp,
1607 ec_GFp_simple_make_affine,
1608 ec_GFp_simple_points_make_affine,
1609 ec_GFp_nistp521_points_mul,
1610 ec_GFp_nistp521_precompute_mult,
1611 ec_GFp_nistp521_have_precompute_mult,
1612 ec_GFp_nist_field_mul,
1613 ec_GFp_nist_field_sqr,
1614 0 /* field_div */ ,
1615 0 /* field_encode */ ,
1616 0 /* field_decode */ ,
1617 0 /* field_set_to_one */
1618 };
1619
1620 return &ret;
1621 }
1622
1623 /******************************************************************************/
1624 /*
1625 * FUNCTIONS TO MANAGE PRECOMPUTATION
1626 */
1627
1628 static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1629 {
1630 NISTP521_PRE_COMP *ret = NULL;
1631 ret = (NISTP521_PRE_COMP *) OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1632 if (!ret) {
1633 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1634 return ret;
1635 }
1636 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1637 ret->references = 1;
1638 return ret;
1639 }
1640
1641 static void *nistp521_pre_comp_dup(void *src_)
1642 {
1643 NISTP521_PRE_COMP *src = src_;
1644
1645 /* no need to actually copy, these objects never change! */
1646 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1647
1648 return src_;
1649 }
1650
1651 static void nistp521_pre_comp_free(void *pre_)
1652 {
1653 int i;
1654 NISTP521_PRE_COMP *pre = pre_;
1655
1656 if (!pre)
1657 return;
1658
1659 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1660 if (i > 0)
1661 return;
1662
1663 OPENSSL_free(pre);
1664 }
1665
1666 static void nistp521_pre_comp_clear_free(void *pre_)
1667 {
1668 int i;
1669 NISTP521_PRE_COMP *pre = pre_;
1670
1671 if (!pre)
1672 return;
1673
1674 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1675 if (i > 0)
1676 return;
1677
1678 OPENSSL_cleanse(pre, sizeof(*pre));
1679 OPENSSL_free(pre);
1680 }
1681
1682 /******************************************************************************/
1683 /*
1684 * OPENSSL EC_METHOD FUNCTIONS
1685 */
1686
1687 int ec_GFp_nistp521_group_init(EC_GROUP *group)
1688 {
1689 int ret;
1690 ret = ec_GFp_simple_group_init(group);
1691 group->a_is_minus3 = 1;
1692 return ret;
1693 }
1694
1695 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1696 const BIGNUM *a, const BIGNUM *b,
1697 BN_CTX *ctx)
1698 {
1699 int ret = 0;
1700 BN_CTX *new_ctx = NULL;
1701 BIGNUM *curve_p, *curve_a, *curve_b;
1702
1703 if (ctx == NULL)
1704 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1705 return 0;
1706 BN_CTX_start(ctx);
1707 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1708 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1709 ((curve_b = BN_CTX_get(ctx)) == NULL))
1710 goto err;
1711 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1712 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1713 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1714 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1715 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1716 EC_R_WRONG_CURVE_PARAMETERS);
1717 goto err;
1718 }
1719 group->field_mod_func = BN_nist_mod_521;
1720 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1721 err:
1722 BN_CTX_end(ctx);
1723 if (new_ctx != NULL)
1724 BN_CTX_free(new_ctx);
1725 return ret;
1726 }
1727
1728 /*
1729 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1730 * (X/Z^2, Y/Z^3)
1731 */
1732 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1733 const EC_POINT *point,
1734 BIGNUM *x, BIGNUM *y,
1735 BN_CTX *ctx)
1736 {
1737 felem z1, z2, x_in, y_in, x_out, y_out;
1738 largefelem tmp;
1739
1740 if (EC_POINT_is_at_infinity(group, point)) {
1741 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1742 EC_R_POINT_AT_INFINITY);
1743 return 0;
1744 }
1745 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1746 (!BN_to_felem(z1, &point->Z)))
1747 return 0;
1748 felem_inv(z2, z1);
1749 felem_square(tmp, z2);
1750 felem_reduce(z1, tmp);
1751 felem_mul(tmp, x_in, z1);
1752 felem_reduce(x_in, tmp);
1753 felem_contract(x_out, x_in);
1754 if (x != NULL) {
1755 if (!felem_to_BN(x, x_out)) {
1756 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1757 ERR_R_BN_LIB);
1758 return 0;
1759 }
1760 }
1761 felem_mul(tmp, z1, z2);
1762 felem_reduce(z1, tmp);
1763 felem_mul(tmp, y_in, z1);
1764 felem_reduce(y_in, tmp);
1765 felem_contract(y_out, y_in);
1766 if (y != NULL) {
1767 if (!felem_to_BN(y, y_out)) {
1768 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1769 ERR_R_BN_LIB);
1770 return 0;
1771 }
1772 }
1773 return 1;
1774 }
1775
1776 /* points below is of size |num|, and tmp_felems is of size |num+1/ */
1777 static void make_points_affine(size_t num, felem points[][3],
1778 felem tmp_felems[])
1779 {
1780 /*
1781 * Runs in constant time, unless an input is the point at infinity (which
1782 * normally shouldn't happen).
1783 */
1784 ec_GFp_nistp_points_make_affine_internal(num,
1785 points,
1786 sizeof(felem),
1787 tmp_felems,
1788 (void (*)(void *))felem_one,
1789 (int (*)(const void *))
1790 felem_is_zero_int,
1791 (void (*)(void *, const void *))
1792 felem_assign,
1793 (void (*)(void *, const void *))
1794 felem_square_reduce, (void (*)
1795 (void *,
1796 const void
1797 *,
1798 const void
1799 *))
1800 felem_mul_reduce,
1801 (void (*)(void *, const void *))
1802 felem_inv,
1803 (void (*)(void *, const void *))
1804 felem_contract);
1805 }
1806
1807 /*
1808 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1809 * values Result is stored in r (r can equal one of the inputs).
1810 */
1811 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1812 const BIGNUM *scalar, size_t num,
1813 const EC_POINT *points[],
1814 const BIGNUM *scalars[], BN_CTX *ctx)
1815 {
1816 int ret = 0;
1817 int j;
1818 int mixed = 0;
1819 BN_CTX *new_ctx = NULL;
1820 BIGNUM *x, *y, *z, *tmp_scalar;
1821 felem_bytearray g_secret;
1822 felem_bytearray *secrets = NULL;
1823 felem(*pre_comp)[17][3] = NULL;
1824 felem *tmp_felems = NULL;
1825 felem_bytearray tmp;
1826 unsigned i, num_bytes;
1827 int have_pre_comp = 0;
1828 size_t num_points = num;
1829 felem x_in, y_in, z_in, x_out, y_out, z_out;
1830 NISTP521_PRE_COMP *pre = NULL;
1831 felem(*g_pre_comp)[3] = NULL;
1832 EC_POINT *generator = NULL;
1833 const EC_POINT *p = NULL;
1834 const BIGNUM *p_scalar = NULL;
1835
1836 if (ctx == NULL)
1837 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1838 return 0;
1839 BN_CTX_start(ctx);
1840 if (((x = BN_CTX_get(ctx)) == NULL) ||
1841 ((y = BN_CTX_get(ctx)) == NULL) ||
1842 ((z = BN_CTX_get(ctx)) == NULL) ||
1843 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1844 goto err;
1845
1846 if (scalar != NULL) {
1847 pre = EC_EX_DATA_get_data(group->extra_data,
1848 nistp521_pre_comp_dup,
1849 nistp521_pre_comp_free,
1850 nistp521_pre_comp_clear_free);
1851 if (pre)
1852 /* we have precomputation, try to use it */
1853 g_pre_comp = &pre->g_pre_comp[0];
1854 else
1855 /* try to use the standard precomputation */
1856 g_pre_comp = (felem(*)[3]) gmul;
1857 generator = EC_POINT_new(group);
1858 if (generator == NULL)
1859 goto err;
1860 /* get the generator from precomputation */
1861 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1862 !felem_to_BN(y, g_pre_comp[1][1]) ||
1863 !felem_to_BN(z, g_pre_comp[1][2])) {
1864 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1865 goto err;
1866 }
1867 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1868 generator, x, y, z,
1869 ctx))
1870 goto err;
1871 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1872 /* precomputation matches generator */
1873 have_pre_comp = 1;
1874 else
1875 /*
1876 * we don't have valid precomputation: treat the generator as a
1877 * random point
1878 */
1879 num_points++;
1880 }
1881
1882 if (num_points > 0) {
1883 if (num_points >= 2) {
1884 /*
1885 * unless we precompute multiples for just one point, converting
1886 * those into affine form is time well spent
1887 */
1888 mixed = 1;
1889 }
1890 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1891 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1892 if (mixed)
1893 tmp_felems =
1894 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1895 if ((secrets == NULL) || (pre_comp == NULL)
1896 || (mixed && (tmp_felems == NULL))) {
1897 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1898 goto err;
1899 }
1900
1901 /*
1902 * we treat NULL scalars as 0, and NULL points as points at infinity,
1903 * i.e., they contribute nothing to the linear combination
1904 */
1905 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1906 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1907 for (i = 0; i < num_points; ++i) {
1908 if (i == num)
1909 /*
1910 * we didn't have a valid precomputation, so we pick the
1911 * generator
1912 */
1913 {
1914 p = EC_GROUP_get0_generator(group);
1915 p_scalar = scalar;
1916 } else
1917 /* the i^th point */
1918 {
1919 p = points[i];
1920 p_scalar = scalars[i];
1921 }
1922 if ((p_scalar != NULL) && (p != NULL)) {
1923 /* reduce scalar to 0 <= scalar < 2^521 */
1924 if ((BN_num_bits(p_scalar) > 521)
1925 || (BN_is_negative(p_scalar))) {
1926 /*
1927 * this is an unusual input, and we don't guarantee
1928 * constant-timeness
1929 */
1930 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1931 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1932 goto err;
1933 }
1934 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1935 } else
1936 num_bytes = BN_bn2bin(p_scalar, tmp);
1937 flip_endian(secrets[i], tmp, num_bytes);
1938 /* precompute multiples */
1939 if ((!BN_to_felem(x_out, &p->X)) ||
1940 (!BN_to_felem(y_out, &p->Y)) ||
1941 (!BN_to_felem(z_out, &p->Z)))
1942 goto err;
1943 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1944 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1945 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1946 for (j = 2; j <= 16; ++j) {
1947 if (j & 1) {
1948 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1949 pre_comp[i][j][2], pre_comp[i][1][0],
1950 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1951 pre_comp[i][j - 1][0],
1952 pre_comp[i][j - 1][1],
1953 pre_comp[i][j - 1][2]);
1954 } else {
1955 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1956 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1957 pre_comp[i][j / 2][1],
1958 pre_comp[i][j / 2][2]);
1959 }
1960 }
1961 }
1962 }
1963 if (mixed)
1964 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1965 }
1966
1967 /* the scalar for the generator */
1968 if ((scalar != NULL) && (have_pre_comp)) {
1969 memset(g_secret, 0, sizeof(g_secret));
1970 /* reduce scalar to 0 <= scalar < 2^521 */
1971 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) {
1972 /*
1973 * this is an unusual input, and we don't guarantee
1974 * constant-timeness
1975 */
1976 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1977 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1978 goto err;
1979 }
1980 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1981 } else
1982 num_bytes = BN_bn2bin(scalar, tmp);
1983 flip_endian(g_secret, tmp, num_bytes);
1984 /* do the multiplication with generator precomputation */
1985 batch_mul(x_out, y_out, z_out,
1986 (const felem_bytearray(*))secrets, num_points,
1987 g_secret,
1988 mixed, (const felem(*)[17][3])pre_comp,
1989 (const felem(*)[3])g_pre_comp);
1990 } else
1991 /* do the multiplication without generator precomputation */
1992 batch_mul(x_out, y_out, z_out,
1993 (const felem_bytearray(*))secrets, num_points,
1994 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1995 /* reduce the output to its unique minimal representation */
1996 felem_contract(x_in, x_out);
1997 felem_contract(y_in, y_out);
1998 felem_contract(z_in, z_out);
1999 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
2000 (!felem_to_BN(z, z_in))) {
2001 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
2002 goto err;
2003 }
2004 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2005
2006 err:
2007 BN_CTX_end(ctx);
2008 if (generator != NULL)
2009 EC_POINT_free(generator);
2010 if (new_ctx != NULL)
2011 BN_CTX_free(new_ctx);
2012 if (secrets != NULL)
2013 OPENSSL_free(secrets);
2014 if (pre_comp != NULL)
2015 OPENSSL_free(pre_comp);
2016 if (tmp_felems != NULL)
2017 OPENSSL_free(tmp_felems);
2018 return ret;
2019 }
2020
2021 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2022 {
2023 int ret = 0;
2024 NISTP521_PRE_COMP *pre = NULL;
2025 int i, j;
2026 BN_CTX *new_ctx = NULL;
2027 BIGNUM *x, *y;
2028 EC_POINT *generator = NULL;
2029 felem tmp_felems[16];
2030
2031 /* throw away old precomputation */
2032 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
2033 nistp521_pre_comp_free,
2034 nistp521_pre_comp_clear_free);
2035 if (ctx == NULL)
2036 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2037 return 0;
2038 BN_CTX_start(ctx);
2039 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2040 goto err;
2041 /* get the generator */
2042 if (group->generator == NULL)
2043 goto err;
2044 generator = EC_POINT_new(group);
2045 if (generator == NULL)
2046 goto err;
2047 BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);
2048 BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);
2049 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2050 goto err;
2051 if ((pre = nistp521_pre_comp_new()) == NULL)
2052 goto err;
2053 /*
2054 * if the generator is the standard one, use built-in precomputation
2055 */
2056 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2057 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2058 ret = 1;
2059 goto err;
2060 }
2061 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
2062 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
2063 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
2064 goto err;
2065 /* compute 2^130*G, 2^260*G, 2^390*G */
2066 for (i = 1; i <= 4; i <<= 1) {
2067 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],
2068 pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],
2069 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
2070 for (j = 0; j < 129; ++j) {
2071 point_double(pre->g_pre_comp[2 * i][0],
2072 pre->g_pre_comp[2 * i][1],
2073 pre->g_pre_comp[2 * i][2],
2074 pre->g_pre_comp[2 * i][0],
2075 pre->g_pre_comp[2 * i][1],
2076 pre->g_pre_comp[2 * i][2]);
2077 }
2078 }
2079 /* g_pre_comp[0] is the point at infinity */
2080 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
2081 /* the remaining multiples */
2082 /* 2^130*G + 2^260*G */
2083 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2084 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2085 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2086 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2087 pre->g_pre_comp[2][2]);
2088 /* 2^130*G + 2^390*G */
2089 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2090 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2091 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2092 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2093 pre->g_pre_comp[2][2]);
2094 /* 2^260*G + 2^390*G */
2095 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2096 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2097 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2098 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2099 pre->g_pre_comp[4][2]);
2100 /* 2^130*G + 2^260*G + 2^390*G */
2101 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2102 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2103 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2104 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2105 pre->g_pre_comp[2][2]);
2106 for (i = 1; i < 8; ++i) {
2107 /* odd multiples: add G */
2108 point_add(pre->g_pre_comp[2 * i + 1][0],
2109 pre->g_pre_comp[2 * i + 1][1],
2110 pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],
2111 pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,
2112 pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2113 pre->g_pre_comp[1][2]);
2114 }
2115 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2116
2117 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2118 nistp521_pre_comp_free,
2119 nistp521_pre_comp_clear_free))
2120 goto err;
2121 ret = 1;
2122 pre = NULL;
2123 err:
2124 BN_CTX_end(ctx);
2125 if (generator != NULL)
2126 EC_POINT_free(generator);
2127 if (new_ctx != NULL)
2128 BN_CTX_free(new_ctx);
2129 if (pre)
2130 nistp521_pre_comp_free(pre);
2131 return ret;
2132 }
2133
2134 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2135 {
2136 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2137 nistp521_pre_comp_free,
2138 nistp521_pre_comp_clear_free)
2139 != NULL)
2140 return 1;
2141 else
2142 return 0;
2143 }
2144
2145 #else
2146 static void *dummy = &dummy;
2147 #endif