]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp521.c
Further comment amendments to preserve formatting prior to source reformat
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
1 /* crypto/ec/ecp_nistp521.c */
2 /*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32 #ifndef OPENSSL_SYS_VMS
33 #include <stdint.h>
34 #else
35 #include <inttypes.h>
36 #endif
37
38 #include <string.h>
39 #include <openssl/err.h>
40 #include "ec_lcl.h"
41
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #else
46 #error "Need GCC 3.1 or later to define type uint128_t"
47 #endif
48
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51 typedef int64_t s64;
52
53 /* The underlying field.
54 *
55 * P521 operates over GF(2^521-1). We can serialise an element of this field
56 * into 66 bytes where the most significant byte contains only a single bit. We
57 * call this an felem_bytearray. */
58
59 typedef u8 felem_bytearray[66];
60
61 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
62 * These values are big-endian. */
63 static const felem_bytearray nistp521_curve_params[5] =
64 {
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff},
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xfc},
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
91 0x3f, 0x00},
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
100 0xbd, 0x66},
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
109 0x66, 0x50}
110 };
111
112 /*-
113 * The representation of field elements.
114 * ------------------------------------
115 *
116 * We represent field elements with nine values. These values are either 64 or
117 * 128 bits and the field element represented is:
118 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
119 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
120 * 58 bits apart, but are greater than 58 bits in length, the most significant
121 * bits of each limb overlap with the least significant bits of the next.
122 *
123 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
124 * 'largefelem' */
125
126 #define NLIMBS 9
127
128 typedef uint64_t limb;
129 typedef limb felem[NLIMBS];
130 typedef uint128_t largefelem[NLIMBS];
131
132 static const limb bottom57bits = 0x1ffffffffffffff;
133 static const limb bottom58bits = 0x3ffffffffffffff;
134
135 /* bin66_to_felem takes a little-endian byte array and converts it into felem
136 * form. This assumes that the CPU is little-endian. */
137 static void bin66_to_felem(felem out, const u8 in[66])
138 {
139 out[0] = (*((limb*) &in[0])) & bottom58bits;
140 out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
141 out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
142 out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
143 out[4] = (*((limb*) &in[29])) & bottom58bits;
144 out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
145 out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
146 out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
147 out[8] = (*((limb*) &in[58])) & bottom57bits;
148 }
149
150 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
151 * array. This assumes that the CPU is little-endian. */
152 static void felem_to_bin66(u8 out[66], const felem in)
153 {
154 memset(out, 0, 66);
155 (*((limb*) &out[0])) = in[0];
156 (*((limb*) &out[7])) |= in[1] << 2;
157 (*((limb*) &out[14])) |= in[2] << 4;
158 (*((limb*) &out[21])) |= in[3] << 6;
159 (*((limb*) &out[29])) = in[4];
160 (*((limb*) &out[36])) |= in[5] << 2;
161 (*((limb*) &out[43])) |= in[6] << 4;
162 (*((limb*) &out[50])) |= in[7] << 6;
163 (*((limb*) &out[58])) = in[8];
164 }
165
166 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
167 static void flip_endian(u8 *out, const u8 *in, unsigned len)
168 {
169 unsigned i;
170 for (i = 0; i < len; ++i)
171 out[i] = in[len-1-i];
172 }
173
174 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
175 static int BN_to_felem(felem out, const BIGNUM *bn)
176 {
177 felem_bytearray b_in;
178 felem_bytearray b_out;
179 unsigned num_bytes;
180
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out, 0, sizeof b_out);
183 num_bytes = BN_num_bytes(bn);
184 if (num_bytes > sizeof b_out)
185 {
186 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
187 return 0;
188 }
189 if (BN_is_negative(bn))
190 {
191 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
192 return 0;
193 }
194 num_bytes = BN_bn2bin(bn, b_in);
195 flip_endian(b_out, b_in, num_bytes);
196 bin66_to_felem(out, b_out);
197 return 1;
198 }
199
200 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
201 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
202 {
203 felem_bytearray b_in, b_out;
204 felem_to_bin66(b_in, in);
205 flip_endian(b_out, b_in, sizeof b_out);
206 return BN_bin2bn(b_out, sizeof b_out, out);
207 }
208
209
210 /*-
211 * Field operations
212 * ----------------
213 */
214
215 static void felem_one(felem out)
216 {
217 out[0] = 1;
218 out[1] = 0;
219 out[2] = 0;
220 out[3] = 0;
221 out[4] = 0;
222 out[5] = 0;
223 out[6] = 0;
224 out[7] = 0;
225 out[8] = 0;
226 }
227
228 static void felem_assign(felem out, const felem in)
229 {
230 out[0] = in[0];
231 out[1] = in[1];
232 out[2] = in[2];
233 out[3] = in[3];
234 out[4] = in[4];
235 out[5] = in[5];
236 out[6] = in[6];
237 out[7] = in[7];
238 out[8] = in[8];
239 }
240
241 /* felem_sum64 sets out = out + in. */
242 static void felem_sum64(felem out, const felem in)
243 {
244 out[0] += in[0];
245 out[1] += in[1];
246 out[2] += in[2];
247 out[3] += in[3];
248 out[4] += in[4];
249 out[5] += in[5];
250 out[6] += in[6];
251 out[7] += in[7];
252 out[8] += in[8];
253 }
254
255 /* felem_scalar sets out = in * scalar */
256 static void felem_scalar(felem out, const felem in, limb scalar)
257 {
258 out[0] = in[0] * scalar;
259 out[1] = in[1] * scalar;
260 out[2] = in[2] * scalar;
261 out[3] = in[3] * scalar;
262 out[4] = in[4] * scalar;
263 out[5] = in[5] * scalar;
264 out[6] = in[6] * scalar;
265 out[7] = in[7] * scalar;
266 out[8] = in[8] * scalar;
267 }
268
269 /* felem_scalar64 sets out = out * scalar */
270 static void felem_scalar64(felem out, limb scalar)
271 {
272 out[0] *= scalar;
273 out[1] *= scalar;
274 out[2] *= scalar;
275 out[3] *= scalar;
276 out[4] *= scalar;
277 out[5] *= scalar;
278 out[6] *= scalar;
279 out[7] *= scalar;
280 out[8] *= scalar;
281 }
282
283 /* felem_scalar128 sets out = out * scalar */
284 static void felem_scalar128(largefelem out, limb scalar)
285 {
286 out[0] *= scalar;
287 out[1] *= scalar;
288 out[2] *= scalar;
289 out[3] *= scalar;
290 out[4] *= scalar;
291 out[5] *= scalar;
292 out[6] *= scalar;
293 out[7] *= scalar;
294 out[8] *= scalar;
295 }
296
297 /*-
298 * felem_neg sets |out| to |-in|
299 * On entry:
300 * in[i] < 2^59 + 2^14
301 * On exit:
302 * out[i] < 2^62
303 */
304 static void felem_neg(felem out, const felem in)
305 {
306 /* In order to prevent underflow, we subtract from 0 mod p. */
307 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
308 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
309
310 out[0] = two62m3 - in[0];
311 out[1] = two62m2 - in[1];
312 out[2] = two62m2 - in[2];
313 out[3] = two62m2 - in[3];
314 out[4] = two62m2 - in[4];
315 out[5] = two62m2 - in[5];
316 out[6] = two62m2 - in[6];
317 out[7] = two62m2 - in[7];
318 out[8] = two62m2 - in[8];
319 }
320
321 /*-
322 * felem_diff64 subtracts |in| from |out|
323 * On entry:
324 * in[i] < 2^59 + 2^14
325 * On exit:
326 * out[i] < out[i] + 2^62
327 */
328 static void felem_diff64(felem out, const felem in)
329 {
330 /* In order to prevent underflow, we add 0 mod p before subtracting. */
331 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
332 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
333
334 out[0] += two62m3 - in[0];
335 out[1] += two62m2 - in[1];
336 out[2] += two62m2 - in[2];
337 out[3] += two62m2 - in[3];
338 out[4] += two62m2 - in[4];
339 out[5] += two62m2 - in[5];
340 out[6] += two62m2 - in[6];
341 out[7] += two62m2 - in[7];
342 out[8] += two62m2 - in[8];
343 }
344
345 /*-
346 * felem_diff_128_64 subtracts |in| from |out|
347 * On entry:
348 * in[i] < 2^62 + 2^17
349 * On exit:
350 * out[i] < out[i] + 2^63
351 */
352 static void felem_diff_128_64(largefelem out, const felem in)
353 {
354 /* In order to prevent underflow, we add 0 mod p before subtracting. */
355 static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
356 static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
357
358 out[0] += two63m6 - in[0];
359 out[1] += two63m5 - in[1];
360 out[2] += two63m5 - in[2];
361 out[3] += two63m5 - in[3];
362 out[4] += two63m5 - in[4];
363 out[5] += two63m5 - in[5];
364 out[6] += two63m5 - in[6];
365 out[7] += two63m5 - in[7];
366 out[8] += two63m5 - in[8];
367 }
368
369 /*-
370 * felem_diff_128_64 subtracts |in| from |out|
371 * On entry:
372 * in[i] < 2^126
373 * On exit:
374 * out[i] < out[i] + 2^127 - 2^69
375 */
376 static void felem_diff128(largefelem out, const largefelem in)
377 {
378 /* In order to prevent underflow, we add 0 mod p before subtracting. */
379 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
380 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
381
382 out[0] += (two127m70 - in[0]);
383 out[1] += (two127m69 - in[1]);
384 out[2] += (two127m69 - in[2]);
385 out[3] += (two127m69 - in[3]);
386 out[4] += (two127m69 - in[4]);
387 out[5] += (two127m69 - in[5]);
388 out[6] += (two127m69 - in[6]);
389 out[7] += (two127m69 - in[7]);
390 out[8] += (two127m69 - in[8]);
391 }
392
393 /*-
394 * felem_square sets |out| = |in|^2
395 * On entry:
396 * in[i] < 2^62
397 * On exit:
398 * out[i] < 17 * max(in[i]) * max(in[i])
399 */
400 static void felem_square(largefelem out, const felem in)
401 {
402 felem inx2, inx4;
403 felem_scalar(inx2, in, 2);
404 felem_scalar(inx4, in, 4);
405
406 /*-
407 * We have many cases were we want to do
408 * in[x] * in[y] +
409 * in[y] * in[x]
410 * This is obviously just
411 * 2 * in[x] * in[y]
412 * However, rather than do the doubling on the 128 bit result, we
413 * double one of the inputs to the multiplication by reading from
414 * |inx2| */
415
416 out[0] = ((uint128_t) in[0]) * in[0];
417 out[1] = ((uint128_t) in[0]) * inx2[1];
418 out[2] = ((uint128_t) in[0]) * inx2[2] +
419 ((uint128_t) in[1]) * in[1];
420 out[3] = ((uint128_t) in[0]) * inx2[3] +
421 ((uint128_t) in[1]) * inx2[2];
422 out[4] = ((uint128_t) in[0]) * inx2[4] +
423 ((uint128_t) in[1]) * inx2[3] +
424 ((uint128_t) in[2]) * in[2];
425 out[5] = ((uint128_t) in[0]) * inx2[5] +
426 ((uint128_t) in[1]) * inx2[4] +
427 ((uint128_t) in[2]) * inx2[3];
428 out[6] = ((uint128_t) in[0]) * inx2[6] +
429 ((uint128_t) in[1]) * inx2[5] +
430 ((uint128_t) in[2]) * inx2[4] +
431 ((uint128_t) in[3]) * in[3];
432 out[7] = ((uint128_t) in[0]) * inx2[7] +
433 ((uint128_t) in[1]) * inx2[6] +
434 ((uint128_t) in[2]) * inx2[5] +
435 ((uint128_t) in[3]) * inx2[4];
436 out[8] = ((uint128_t) in[0]) * inx2[8] +
437 ((uint128_t) in[1]) * inx2[7] +
438 ((uint128_t) in[2]) * inx2[6] +
439 ((uint128_t) in[3]) * inx2[5] +
440 ((uint128_t) in[4]) * in[4];
441
442 /* The remaining limbs fall above 2^521, with the first falling at
443 * 2^522. They correspond to locations one bit up from the limbs
444 * produced above so we would have to multiply by two to align them.
445 * Again, rather than operate on the 128-bit result, we double one of
446 * the inputs to the multiplication. If we want to double for both this
447 * reason, and the reason above, then we end up multiplying by four. */
448
449 /* 9 */
450 out[0] += ((uint128_t) in[1]) * inx4[8] +
451 ((uint128_t) in[2]) * inx4[7] +
452 ((uint128_t) in[3]) * inx4[6] +
453 ((uint128_t) in[4]) * inx4[5];
454
455 /* 10 */
456 out[1] += ((uint128_t) in[2]) * inx4[8] +
457 ((uint128_t) in[3]) * inx4[7] +
458 ((uint128_t) in[4]) * inx4[6] +
459 ((uint128_t) in[5]) * inx2[5];
460
461 /* 11 */
462 out[2] += ((uint128_t) in[3]) * inx4[8] +
463 ((uint128_t) in[4]) * inx4[7] +
464 ((uint128_t) in[5]) * inx4[6];
465
466 /* 12 */
467 out[3] += ((uint128_t) in[4]) * inx4[8] +
468 ((uint128_t) in[5]) * inx4[7] +
469 ((uint128_t) in[6]) * inx2[6];
470
471 /* 13 */
472 out[4] += ((uint128_t) in[5]) * inx4[8] +
473 ((uint128_t) in[6]) * inx4[7];
474
475 /* 14 */
476 out[5] += ((uint128_t) in[6]) * inx4[8] +
477 ((uint128_t) in[7]) * inx2[7];
478
479 /* 15 */
480 out[6] += ((uint128_t) in[7]) * inx4[8];
481
482 /* 16 */
483 out[7] += ((uint128_t) in[8]) * inx2[8];
484 }
485
486 /*-
487 * felem_mul sets |out| = |in1| * |in2|
488 * On entry:
489 * in1[i] < 2^64
490 * in2[i] < 2^63
491 * On exit:
492 * out[i] < 17 * max(in1[i]) * max(in2[i])
493 */
494 static void felem_mul(largefelem out, const felem in1, const felem in2)
495 {
496 felem in2x2;
497 felem_scalar(in2x2, in2, 2);
498
499 out[0] = ((uint128_t) in1[0]) * in2[0];
500
501 out[1] = ((uint128_t) in1[0]) * in2[1] +
502 ((uint128_t) in1[1]) * in2[0];
503
504 out[2] = ((uint128_t) in1[0]) * in2[2] +
505 ((uint128_t) in1[1]) * in2[1] +
506 ((uint128_t) in1[2]) * in2[0];
507
508 out[3] = ((uint128_t) in1[0]) * in2[3] +
509 ((uint128_t) in1[1]) * in2[2] +
510 ((uint128_t) in1[2]) * in2[1] +
511 ((uint128_t) in1[3]) * in2[0];
512
513 out[4] = ((uint128_t) in1[0]) * in2[4] +
514 ((uint128_t) in1[1]) * in2[3] +
515 ((uint128_t) in1[2]) * in2[2] +
516 ((uint128_t) in1[3]) * in2[1] +
517 ((uint128_t) in1[4]) * in2[0];
518
519 out[5] = ((uint128_t) in1[0]) * in2[5] +
520 ((uint128_t) in1[1]) * in2[4] +
521 ((uint128_t) in1[2]) * in2[3] +
522 ((uint128_t) in1[3]) * in2[2] +
523 ((uint128_t) in1[4]) * in2[1] +
524 ((uint128_t) in1[5]) * in2[0];
525
526 out[6] = ((uint128_t) in1[0]) * in2[6] +
527 ((uint128_t) in1[1]) * in2[5] +
528 ((uint128_t) in1[2]) * in2[4] +
529 ((uint128_t) in1[3]) * in2[3] +
530 ((uint128_t) in1[4]) * in2[2] +
531 ((uint128_t) in1[5]) * in2[1] +
532 ((uint128_t) in1[6]) * in2[0];
533
534 out[7] = ((uint128_t) in1[0]) * in2[7] +
535 ((uint128_t) in1[1]) * in2[6] +
536 ((uint128_t) in1[2]) * in2[5] +
537 ((uint128_t) in1[3]) * in2[4] +
538 ((uint128_t) in1[4]) * in2[3] +
539 ((uint128_t) in1[5]) * in2[2] +
540 ((uint128_t) in1[6]) * in2[1] +
541 ((uint128_t) in1[7]) * in2[0];
542
543 out[8] = ((uint128_t) in1[0]) * in2[8] +
544 ((uint128_t) in1[1]) * in2[7] +
545 ((uint128_t) in1[2]) * in2[6] +
546 ((uint128_t) in1[3]) * in2[5] +
547 ((uint128_t) in1[4]) * in2[4] +
548 ((uint128_t) in1[5]) * in2[3] +
549 ((uint128_t) in1[6]) * in2[2] +
550 ((uint128_t) in1[7]) * in2[1] +
551 ((uint128_t) in1[8]) * in2[0];
552
553 /* See comment in felem_square about the use of in2x2 here */
554
555 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
556 ((uint128_t) in1[2]) * in2x2[7] +
557 ((uint128_t) in1[3]) * in2x2[6] +
558 ((uint128_t) in1[4]) * in2x2[5] +
559 ((uint128_t) in1[5]) * in2x2[4] +
560 ((uint128_t) in1[6]) * in2x2[3] +
561 ((uint128_t) in1[7]) * in2x2[2] +
562 ((uint128_t) in1[8]) * in2x2[1];
563
564 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
565 ((uint128_t) in1[3]) * in2x2[7] +
566 ((uint128_t) in1[4]) * in2x2[6] +
567 ((uint128_t) in1[5]) * in2x2[5] +
568 ((uint128_t) in1[6]) * in2x2[4] +
569 ((uint128_t) in1[7]) * in2x2[3] +
570 ((uint128_t) in1[8]) * in2x2[2];
571
572 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
573 ((uint128_t) in1[4]) * in2x2[7] +
574 ((uint128_t) in1[5]) * in2x2[6] +
575 ((uint128_t) in1[6]) * in2x2[5] +
576 ((uint128_t) in1[7]) * in2x2[4] +
577 ((uint128_t) in1[8]) * in2x2[3];
578
579 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
580 ((uint128_t) in1[5]) * in2x2[7] +
581 ((uint128_t) in1[6]) * in2x2[6] +
582 ((uint128_t) in1[7]) * in2x2[5] +
583 ((uint128_t) in1[8]) * in2x2[4];
584
585 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
586 ((uint128_t) in1[6]) * in2x2[7] +
587 ((uint128_t) in1[7]) * in2x2[6] +
588 ((uint128_t) in1[8]) * in2x2[5];
589
590 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
591 ((uint128_t) in1[7]) * in2x2[7] +
592 ((uint128_t) in1[8]) * in2x2[6];
593
594 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
595 ((uint128_t) in1[8]) * in2x2[7];
596
597 out[7] += ((uint128_t) in1[8]) * in2x2[8];
598 }
599
600 static const limb bottom52bits = 0xfffffffffffff;
601
602 /*-
603 * felem_reduce converts a largefelem to an felem.
604 * On entry:
605 * in[i] < 2^128
606 * On exit:
607 * out[i] < 2^59 + 2^14
608 */
609 static void felem_reduce(felem out, const largefelem in)
610 {
611 u64 overflow1, overflow2;
612
613 out[0] = ((limb) in[0]) & bottom58bits;
614 out[1] = ((limb) in[1]) & bottom58bits;
615 out[2] = ((limb) in[2]) & bottom58bits;
616 out[3] = ((limb) in[3]) & bottom58bits;
617 out[4] = ((limb) in[4]) & bottom58bits;
618 out[5] = ((limb) in[5]) & bottom58bits;
619 out[6] = ((limb) in[6]) & bottom58bits;
620 out[7] = ((limb) in[7]) & bottom58bits;
621 out[8] = ((limb) in[8]) & bottom58bits;
622
623 /* out[i] < 2^58 */
624
625 out[1] += ((limb) in[0]) >> 58;
626 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
627 /* out[1] < 2^58 + 2^6 + 2^58
628 * = 2^59 + 2^6 */
629 out[2] += ((limb) (in[0] >> 64)) >> 52;
630
631 out[2] += ((limb) in[1]) >> 58;
632 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
633 out[3] += ((limb) (in[1] >> 64)) >> 52;
634
635 out[3] += ((limb) in[2]) >> 58;
636 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
637 out[4] += ((limb) (in[2] >> 64)) >> 52;
638
639 out[4] += ((limb) in[3]) >> 58;
640 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
641 out[5] += ((limb) (in[3] >> 64)) >> 52;
642
643 out[5] += ((limb) in[4]) >> 58;
644 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
645 out[6] += ((limb) (in[4] >> 64)) >> 52;
646
647 out[6] += ((limb) in[5]) >> 58;
648 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
649 out[7] += ((limb) (in[5] >> 64)) >> 52;
650
651 out[7] += ((limb) in[6]) >> 58;
652 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
653 out[8] += ((limb) (in[6] >> 64)) >> 52;
654
655 out[8] += ((limb) in[7]) >> 58;
656 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
657 /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
658 * < 2^59 + 2^13 */
659 overflow1 = ((limb) (in[7] >> 64)) >> 52;
660
661 overflow1 += ((limb) in[8]) >> 58;
662 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
663 overflow2 = ((limb) (in[8] >> 64)) >> 52;
664
665 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
666 overflow2 <<= 1; /* overflow2 < 2^13 */
667
668 out[0] += overflow1; /* out[0] < 2^60 */
669 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
670
671 out[1] += out[0] >> 58; out[0] &= bottom58bits;
672 /* out[0] < 2^58
673 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
674 * < 2^59 + 2^14 */
675 }
676
677 static void felem_square_reduce(felem out, const felem in)
678 {
679 largefelem tmp;
680 felem_square(tmp, in);
681 felem_reduce(out, tmp);
682 }
683
684 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
685 {
686 largefelem tmp;
687 felem_mul(tmp, in1, in2);
688 felem_reduce(out, tmp);
689 }
690
691 /*-
692 * felem_inv calculates |out| = |in|^{-1}
693 *
694 * Based on Fermat's Little Theorem:
695 * a^p = a (mod p)
696 * a^{p-1} = 1 (mod p)
697 * a^{p-2} = a^{-1} (mod p)
698 */
699 static void felem_inv(felem out, const felem in)
700 {
701 felem ftmp, ftmp2, ftmp3, ftmp4;
702 largefelem tmp;
703 unsigned i;
704
705 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
706 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
707 felem_assign(ftmp2, ftmp);
708 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
709 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
710 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
711
712 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
713 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
714 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
715
716 felem_assign(ftmp2, ftmp3);
717 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
718 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
719 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
720 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
721 felem_assign(ftmp4, ftmp3);
722 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
723 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
724 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
725 felem_assign(ftmp2, ftmp3);
726
727 for (i = 0; i < 8; i++)
728 {
729 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
730 }
731 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
732 felem_assign(ftmp2, ftmp3);
733
734 for (i = 0; i < 16; i++)
735 {
736 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
737 }
738 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
739 felem_assign(ftmp2, ftmp3);
740
741 for (i = 0; i < 32; i++)
742 {
743 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
744 }
745 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
746 felem_assign(ftmp2, ftmp3);
747
748 for (i = 0; i < 64; i++)
749 {
750 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
751 }
752 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
753 felem_assign(ftmp2, ftmp3);
754
755 for (i = 0; i < 128; i++)
756 {
757 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
758 }
759 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
760 felem_assign(ftmp2, ftmp3);
761
762 for (i = 0; i < 256; i++)
763 {
764 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
765 }
766 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
767
768 for (i = 0; i < 9; i++)
769 {
770 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
771 }
772 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
773 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
774 }
775
776 /* This is 2^521-1, expressed as an felem */
777 static const felem kPrime =
778 {
779 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
780 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
781 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
782 };
783
784 /*-
785 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
786 * otherwise.
787 * On entry:
788 * in[i] < 2^59 + 2^14
789 */
790 static limb felem_is_zero(const felem in)
791 {
792 felem ftmp;
793 limb is_zero, is_p;
794 felem_assign(ftmp, in);
795
796 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
797 /* ftmp[8] < 2^57 */
798 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
799 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
800 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
801 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
802 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
803 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
804 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
805 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
806 /* ftmp[8] < 2^57 + 4 */
807
808 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
809 * greater than our bound for ftmp[8]. Therefore we only have to check
810 * if the zero is zero or 2^521-1. */
811
812 is_zero = 0;
813 is_zero |= ftmp[0];
814 is_zero |= ftmp[1];
815 is_zero |= ftmp[2];
816 is_zero |= ftmp[3];
817 is_zero |= ftmp[4];
818 is_zero |= ftmp[5];
819 is_zero |= ftmp[6];
820 is_zero |= ftmp[7];
821 is_zero |= ftmp[8];
822
823 is_zero--;
824 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
825 * can be set is if is_zero was 0 before the decrement. */
826 is_zero = ((s64) is_zero) >> 63;
827
828 is_p = ftmp[0] ^ kPrime[0];
829 is_p |= ftmp[1] ^ kPrime[1];
830 is_p |= ftmp[2] ^ kPrime[2];
831 is_p |= ftmp[3] ^ kPrime[3];
832 is_p |= ftmp[4] ^ kPrime[4];
833 is_p |= ftmp[5] ^ kPrime[5];
834 is_p |= ftmp[6] ^ kPrime[6];
835 is_p |= ftmp[7] ^ kPrime[7];
836 is_p |= ftmp[8] ^ kPrime[8];
837
838 is_p--;
839 is_p = ((s64) is_p) >> 63;
840
841 is_zero |= is_p;
842 return is_zero;
843 }
844
845 static int felem_is_zero_int(const felem in)
846 {
847 return (int) (felem_is_zero(in) & ((limb)1));
848 }
849
850 /*-
851 * felem_contract converts |in| to its unique, minimal representation.
852 * On entry:
853 * in[i] < 2^59 + 2^14
854 */
855 static void felem_contract(felem out, const felem in)
856 {
857 limb is_p, is_greater, sign;
858 static const limb two58 = ((limb)1) << 58;
859
860 felem_assign(out, in);
861
862 out[0] += out[8] >> 57; out[8] &= bottom57bits;
863 /* out[8] < 2^57 */
864 out[1] += out[0] >> 58; out[0] &= bottom58bits;
865 out[2] += out[1] >> 58; out[1] &= bottom58bits;
866 out[3] += out[2] >> 58; out[2] &= bottom58bits;
867 out[4] += out[3] >> 58; out[3] &= bottom58bits;
868 out[5] += out[4] >> 58; out[4] &= bottom58bits;
869 out[6] += out[5] >> 58; out[5] &= bottom58bits;
870 out[7] += out[6] >> 58; out[6] &= bottom58bits;
871 out[8] += out[7] >> 58; out[7] &= bottom58bits;
872 /* out[8] < 2^57 + 4 */
873
874 /* If the value is greater than 2^521-1 then we have to subtract
875 * 2^521-1 out. See the comments in felem_is_zero regarding why we
876 * don't test for other multiples of the prime. */
877
878 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
879
880 is_p = out[0] ^ kPrime[0];
881 is_p |= out[1] ^ kPrime[1];
882 is_p |= out[2] ^ kPrime[2];
883 is_p |= out[3] ^ kPrime[3];
884 is_p |= out[4] ^ kPrime[4];
885 is_p |= out[5] ^ kPrime[5];
886 is_p |= out[6] ^ kPrime[6];
887 is_p |= out[7] ^ kPrime[7];
888 is_p |= out[8] ^ kPrime[8];
889
890 is_p--;
891 is_p &= is_p << 32;
892 is_p &= is_p << 16;
893 is_p &= is_p << 8;
894 is_p &= is_p << 4;
895 is_p &= is_p << 2;
896 is_p &= is_p << 1;
897 is_p = ((s64) is_p) >> 63;
898 is_p = ~is_p;
899
900 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
901
902 out[0] &= is_p;
903 out[1] &= is_p;
904 out[2] &= is_p;
905 out[3] &= is_p;
906 out[4] &= is_p;
907 out[5] &= is_p;
908 out[6] &= is_p;
909 out[7] &= is_p;
910 out[8] &= is_p;
911
912 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
913 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
914 is_greater = out[8] >> 57;
915 is_greater |= is_greater << 32;
916 is_greater |= is_greater << 16;
917 is_greater |= is_greater << 8;
918 is_greater |= is_greater << 4;
919 is_greater |= is_greater << 2;
920 is_greater |= is_greater << 1;
921 is_greater = ((s64) is_greater) >> 63;
922
923 out[0] -= kPrime[0] & is_greater;
924 out[1] -= kPrime[1] & is_greater;
925 out[2] -= kPrime[2] & is_greater;
926 out[3] -= kPrime[3] & is_greater;
927 out[4] -= kPrime[4] & is_greater;
928 out[5] -= kPrime[5] & is_greater;
929 out[6] -= kPrime[6] & is_greater;
930 out[7] -= kPrime[7] & is_greater;
931 out[8] -= kPrime[8] & is_greater;
932
933 /* Eliminate negative coefficients */
934 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
935 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
936 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
937 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
938 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
939 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
940 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
941 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
942 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
943 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
944 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
945 }
946
947 /*-
948 * Group operations
949 * ----------------
950 *
951 * Building on top of the field operations we have the operations on the
952 * elliptic curve group itself. Points on the curve are represented in Jacobian
953 * coordinates */
954
955 /*-
956 * point_double calcuates 2*(x_in, y_in, z_in)
957 *
958 * The method is taken from:
959 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
960 *
961 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
962 * while x_out == y_in is not (maybe this works, but it's not tested). */
963 static void
964 point_double(felem x_out, felem y_out, felem z_out,
965 const felem x_in, const felem y_in, const felem z_in)
966 {
967 largefelem tmp, tmp2;
968 felem delta, gamma, beta, alpha, ftmp, ftmp2;
969
970 felem_assign(ftmp, x_in);
971 felem_assign(ftmp2, x_in);
972
973 /* delta = z^2 */
974 felem_square(tmp, z_in);
975 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
976
977 /* gamma = y^2 */
978 felem_square(tmp, y_in);
979 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
980
981 /* beta = x*gamma */
982 felem_mul(tmp, x_in, gamma);
983 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
984
985 /* alpha = 3*(x-delta)*(x+delta) */
986 felem_diff64(ftmp, delta);
987 /* ftmp[i] < 2^61 */
988 felem_sum64(ftmp2, delta);
989 /* ftmp2[i] < 2^60 + 2^15 */
990 felem_scalar64(ftmp2, 3);
991 /* ftmp2[i] < 3*2^60 + 3*2^15 */
992 felem_mul(tmp, ftmp, ftmp2);
993 /*-
994 * tmp[i] < 17(3*2^121 + 3*2^76)
995 * = 61*2^121 + 61*2^76
996 * < 64*2^121 + 64*2^76
997 * = 2^127 + 2^82
998 * < 2^128
999 */
1000 felem_reduce(alpha, tmp);
1001
1002 /* x' = alpha^2 - 8*beta */
1003 felem_square(tmp, alpha);
1004 /* tmp[i] < 17*2^120
1005 * < 2^125 */
1006 felem_assign(ftmp, beta);
1007 felem_scalar64(ftmp, 8);
1008 /* ftmp[i] < 2^62 + 2^17 */
1009 felem_diff_128_64(tmp, ftmp);
1010 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1011 felem_reduce(x_out, tmp);
1012
1013 /* z' = (y + z)^2 - gamma - delta */
1014 felem_sum64(delta, gamma);
1015 /* delta[i] < 2^60 + 2^15 */
1016 felem_assign(ftmp, y_in);
1017 felem_sum64(ftmp, z_in);
1018 /* ftmp[i] < 2^60 + 2^15 */
1019 felem_square(tmp, ftmp);
1020 /* tmp[i] < 17(2^122)
1021 * < 2^127 */
1022 felem_diff_128_64(tmp, delta);
1023 /* tmp[i] < 2^127 + 2^63 */
1024 felem_reduce(z_out, tmp);
1025
1026 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1027 felem_scalar64(beta, 4);
1028 /* beta[i] < 2^61 + 2^16 */
1029 felem_diff64(beta, x_out);
1030 /* beta[i] < 2^61 + 2^60 + 2^16 */
1031 felem_mul(tmp, alpha, beta);
1032 /*-
1033 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1034 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1035 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1036 * < 2^128
1037 */
1038 felem_square(tmp2, gamma);
1039 /*-
1040 * tmp2[i] < 17*(2^59 + 2^14)^2
1041 * = 17*(2^118 + 2^74 + 2^28)
1042 */
1043 felem_scalar128(tmp2, 8);
1044 /*-
1045 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1046 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1047 * < 2^126
1048 */
1049 felem_diff128(tmp, tmp2);
1050 /*-
1051 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1052 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1053 * 2^74 + 2^69 + 2^34 + 2^30
1054 * < 2^128
1055 */
1056 felem_reduce(y_out, tmp);
1057 }
1058
1059 /* copy_conditional copies in to out iff mask is all ones. */
1060 static void
1061 copy_conditional(felem out, const felem in, limb mask)
1062 {
1063 unsigned i;
1064 for (i = 0; i < NLIMBS; ++i)
1065 {
1066 const limb tmp = mask & (in[i] ^ out[i]);
1067 out[i] ^= tmp;
1068 }
1069 }
1070
1071 /*-
1072 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1073 *
1074 * The method is taken from
1075 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1076 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1077 *
1078 * This function includes a branch for checking whether the two input points
1079 * are equal (while not equal to the point at infinity). This case never
1080 * happens during single point multiplication, so there is no timing leak for
1081 * ECDH or ECDSA signing. */
1082 static void point_add(felem x3, felem y3, felem z3,
1083 const felem x1, const felem y1, const felem z1,
1084 const int mixed, const felem x2, const felem y2, const felem z2)
1085 {
1086 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1087 largefelem tmp, tmp2;
1088 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1089
1090 z1_is_zero = felem_is_zero(z1);
1091 z2_is_zero = felem_is_zero(z2);
1092
1093 /* ftmp = z1z1 = z1**2 */
1094 felem_square(tmp, z1);
1095 felem_reduce(ftmp, tmp);
1096
1097 if (!mixed)
1098 {
1099 /* ftmp2 = z2z2 = z2**2 */
1100 felem_square(tmp, z2);
1101 felem_reduce(ftmp2, tmp);
1102
1103 /* u1 = ftmp3 = x1*z2z2 */
1104 felem_mul(tmp, x1, ftmp2);
1105 felem_reduce(ftmp3, tmp);
1106
1107 /* ftmp5 = z1 + z2 */
1108 felem_assign(ftmp5, z1);
1109 felem_sum64(ftmp5, z2);
1110 /* ftmp5[i] < 2^61 */
1111
1112 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1113 felem_square(tmp, ftmp5);
1114 /* tmp[i] < 17*2^122 */
1115 felem_diff_128_64(tmp, ftmp);
1116 /* tmp[i] < 17*2^122 + 2^63 */
1117 felem_diff_128_64(tmp, ftmp2);
1118 /* tmp[i] < 17*2^122 + 2^64 */
1119 felem_reduce(ftmp5, tmp);
1120
1121 /* ftmp2 = z2 * z2z2 */
1122 felem_mul(tmp, ftmp2, z2);
1123 felem_reduce(ftmp2, tmp);
1124
1125 /* s1 = ftmp6 = y1 * z2**3 */
1126 felem_mul(tmp, y1, ftmp2);
1127 felem_reduce(ftmp6, tmp);
1128 }
1129 else
1130 {
1131 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1132
1133 /* u1 = ftmp3 = x1*z2z2 */
1134 felem_assign(ftmp3, x1);
1135
1136 /* ftmp5 = 2*z1z2 */
1137 felem_scalar(ftmp5, z1, 2);
1138
1139 /* s1 = ftmp6 = y1 * z2**3 */
1140 felem_assign(ftmp6, y1);
1141 }
1142
1143 /* u2 = x2*z1z1 */
1144 felem_mul(tmp, x2, ftmp);
1145 /* tmp[i] < 17*2^120 */
1146
1147 /* h = ftmp4 = u2 - u1 */
1148 felem_diff_128_64(tmp, ftmp3);
1149 /* tmp[i] < 17*2^120 + 2^63 */
1150 felem_reduce(ftmp4, tmp);
1151
1152 x_equal = felem_is_zero(ftmp4);
1153
1154 /* z_out = ftmp5 * h */
1155 felem_mul(tmp, ftmp5, ftmp4);
1156 felem_reduce(z_out, tmp);
1157
1158 /* ftmp = z1 * z1z1 */
1159 felem_mul(tmp, ftmp, z1);
1160 felem_reduce(ftmp, tmp);
1161
1162 /* s2 = tmp = y2 * z1**3 */
1163 felem_mul(tmp, y2, ftmp);
1164 /* tmp[i] < 17*2^120 */
1165
1166 /* r = ftmp5 = (s2 - s1)*2 */
1167 felem_diff_128_64(tmp, ftmp6);
1168 /* tmp[i] < 17*2^120 + 2^63 */
1169 felem_reduce(ftmp5, tmp);
1170 y_equal = felem_is_zero(ftmp5);
1171 felem_scalar64(ftmp5, 2);
1172 /* ftmp5[i] < 2^61 */
1173
1174 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1175 {
1176 point_double(x3, y3, z3, x1, y1, z1);
1177 return;
1178 }
1179
1180 /* I = ftmp = (2h)**2 */
1181 felem_assign(ftmp, ftmp4);
1182 felem_scalar64(ftmp, 2);
1183 /* ftmp[i] < 2^61 */
1184 felem_square(tmp, ftmp);
1185 /* tmp[i] < 17*2^122 */
1186 felem_reduce(ftmp, tmp);
1187
1188 /* J = ftmp2 = h * I */
1189 felem_mul(tmp, ftmp4, ftmp);
1190 felem_reduce(ftmp2, tmp);
1191
1192 /* V = ftmp4 = U1 * I */
1193 felem_mul(tmp, ftmp3, ftmp);
1194 felem_reduce(ftmp4, tmp);
1195
1196 /* x_out = r**2 - J - 2V */
1197 felem_square(tmp, ftmp5);
1198 /* tmp[i] < 17*2^122 */
1199 felem_diff_128_64(tmp, ftmp2);
1200 /* tmp[i] < 17*2^122 + 2^63 */
1201 felem_assign(ftmp3, ftmp4);
1202 felem_scalar64(ftmp4, 2);
1203 /* ftmp4[i] < 2^61 */
1204 felem_diff_128_64(tmp, ftmp4);
1205 /* tmp[i] < 17*2^122 + 2^64 */
1206 felem_reduce(x_out, tmp);
1207
1208 /* y_out = r(V-x_out) - 2 * s1 * J */
1209 felem_diff64(ftmp3, x_out);
1210 /* ftmp3[i] < 2^60 + 2^60
1211 * = 2^61 */
1212 felem_mul(tmp, ftmp5, ftmp3);
1213 /* tmp[i] < 17*2^122 */
1214 felem_mul(tmp2, ftmp6, ftmp2);
1215 /* tmp2[i] < 17*2^120 */
1216 felem_scalar128(tmp2, 2);
1217 /* tmp2[i] < 17*2^121 */
1218 felem_diff128(tmp, tmp2);
1219 /* tmp[i] < 2^127 - 2^69 + 17*2^122
1220 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1221 * < 2^127 */
1222 felem_reduce(y_out, tmp);
1223
1224 copy_conditional(x_out, x2, z1_is_zero);
1225 copy_conditional(x_out, x1, z2_is_zero);
1226 copy_conditional(y_out, y2, z1_is_zero);
1227 copy_conditional(y_out, y1, z2_is_zero);
1228 copy_conditional(z_out, z2, z1_is_zero);
1229 copy_conditional(z_out, z1, z2_is_zero);
1230 felem_assign(x3, x_out);
1231 felem_assign(y3, y_out);
1232 felem_assign(z3, z_out);
1233 }
1234
1235 /*-
1236 * Base point pre computation
1237 * --------------------------
1238 *
1239 * Two different sorts of precomputed tables are used in the following code.
1240 * Each contain various points on the curve, where each point is three field
1241 * elements (x, y, z).
1242 *
1243 * For the base point table, z is usually 1 (0 for the point at infinity).
1244 * This table has 16 elements:
1245 * index | bits | point
1246 * ------+---------+------------------------------
1247 * 0 | 0 0 0 0 | 0G
1248 * 1 | 0 0 0 1 | 1G
1249 * 2 | 0 0 1 0 | 2^130G
1250 * 3 | 0 0 1 1 | (2^130 + 1)G
1251 * 4 | 0 1 0 0 | 2^260G
1252 * 5 | 0 1 0 1 | (2^260 + 1)G
1253 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1254 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1255 * 8 | 1 0 0 0 | 2^390G
1256 * 9 | 1 0 0 1 | (2^390 + 1)G
1257 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1258 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1259 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1260 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1261 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1262 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1263 *
1264 * The reason for this is so that we can clock bits into four different
1265 * locations when doing simple scalar multiplies against the base point.
1266 *
1267 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1268
1269 /* gmul is the table of precomputed base points */
1270 static const felem gmul[16][3] =
1271 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1272 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1273 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1274 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1275 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1276 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1277 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1278 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1279 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1280 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1281 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1282 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1283 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1284 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1285 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1286 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1287 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1288 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1289 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1290 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1291 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1292 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1293 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1294 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1295 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1296 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1297 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1298 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1299 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1300 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1301 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1302 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1303 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1304 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1305 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1306 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1307 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1308 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1309 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1310 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1311 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1312 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1313 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1314 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1315 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1316 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1317 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1318 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1319 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1320 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1321 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1322 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1323 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1324 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1325 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1326 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1327 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1328 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1329 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1330 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1331 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1332 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1333 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1334 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1335 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1336 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1337 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1338 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1339 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1340 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1341 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1342 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1343 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1344 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1345 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1346 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1347 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1348 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1349 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1350 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1351 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1352 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1353 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1354 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1355 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1356 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1357 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1358 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1359 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1360 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1361 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1362 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1363 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1364 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1365 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1366 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1367 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1368 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1369 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1370 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1371 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1372 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1373 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1374 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1375 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1376 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1377 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1378 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1379
1380 /* select_point selects the |idx|th point from a precomputation table and
1381 * copies it to out. */
1382 static void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3],
1383 felem out[3])
1384 {
1385 unsigned i, j;
1386 limb *outlimbs = &out[0][0];
1387 memset(outlimbs, 0, 3 * sizeof(felem));
1388
1389 for (i = 0; i < size; i++)
1390 {
1391 const limb *inlimbs = &pre_comp[i][0][0];
1392 limb mask = i ^ idx;
1393 mask |= mask >> 4;
1394 mask |= mask >> 2;
1395 mask |= mask >> 1;
1396 mask &= 1;
1397 mask--;
1398 for (j = 0; j < NLIMBS * 3; j++)
1399 outlimbs[j] |= inlimbs[j] & mask;
1400 }
1401 }
1402
1403 /* get_bit returns the |i|th bit in |in| */
1404 static char get_bit(const felem_bytearray in, int i)
1405 {
1406 if (i < 0)
1407 return 0;
1408 return (in[i >> 3] >> (i & 7)) & 1;
1409 }
1410
1411 /* Interleaved point multiplication using precomputed point multiples:
1412 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1413 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1414 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1415 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1416 static void batch_mul(felem x_out, felem y_out, felem z_out,
1417 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1418 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1419 {
1420 int i, skip;
1421 unsigned num, gen_mul = (g_scalar != NULL);
1422 felem nq[3], tmp[4];
1423 limb bits;
1424 u8 sign, digit;
1425
1426 /* set nq to the point at infinity */
1427 memset(nq, 0, 3 * sizeof(felem));
1428
1429 /* Loop over all scalars msb-to-lsb, interleaving additions
1430 * of multiples of the generator (last quarter of rounds)
1431 * and additions of other points multiples (every 5th round).
1432 */
1433 skip = 1; /* save two point operations in the first round */
1434 for (i = (num_points ? 520 : 130); i >= 0; --i)
1435 {
1436 /* double */
1437 if (!skip)
1438 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1439
1440 /* add multiples of the generator */
1441 if (gen_mul && (i <= 130))
1442 {
1443 bits = get_bit(g_scalar, i + 390) << 3;
1444 if (i < 130)
1445 {
1446 bits |= get_bit(g_scalar, i + 260) << 2;
1447 bits |= get_bit(g_scalar, i + 130) << 1;
1448 bits |= get_bit(g_scalar, i);
1449 }
1450 /* select the point to add, in constant time */
1451 select_point(bits, 16, g_pre_comp, tmp);
1452 if (!skip)
1453 {
1454 point_add(nq[0], nq[1], nq[2],
1455 nq[0], nq[1], nq[2],
1456 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1457 }
1458 else
1459 {
1460 memcpy(nq, tmp, 3 * sizeof(felem));
1461 skip = 0;
1462 }
1463 }
1464
1465 /* do other additions every 5 doublings */
1466 if (num_points && (i % 5 == 0))
1467 {
1468 /* loop over all scalars */
1469 for (num = 0; num < num_points; ++num)
1470 {
1471 bits = get_bit(scalars[num], i + 4) << 5;
1472 bits |= get_bit(scalars[num], i + 3) << 4;
1473 bits |= get_bit(scalars[num], i + 2) << 3;
1474 bits |= get_bit(scalars[num], i + 1) << 2;
1475 bits |= get_bit(scalars[num], i) << 1;
1476 bits |= get_bit(scalars[num], i - 1);
1477 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1478
1479 /* select the point to add or subtract, in constant time */
1480 select_point(digit, 17, pre_comp[num], tmp);
1481 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1482 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1483
1484 if (!skip)
1485 {
1486 point_add(nq[0], nq[1], nq[2],
1487 nq[0], nq[1], nq[2],
1488 mixed, tmp[0], tmp[1], tmp[2]);
1489 }
1490 else
1491 {
1492 memcpy(nq, tmp, 3 * sizeof(felem));
1493 skip = 0;
1494 }
1495 }
1496 }
1497 }
1498 felem_assign(x_out, nq[0]);
1499 felem_assign(y_out, nq[1]);
1500 felem_assign(z_out, nq[2]);
1501 }
1502
1503
1504 /* Precomputation for the group generator. */
1505 typedef struct {
1506 felem g_pre_comp[16][3];
1507 int references;
1508 } NISTP521_PRE_COMP;
1509
1510 const EC_METHOD *EC_GFp_nistp521_method(void)
1511 {
1512 static const EC_METHOD ret = {
1513 EC_FLAGS_DEFAULT_OCT,
1514 NID_X9_62_prime_field,
1515 ec_GFp_nistp521_group_init,
1516 ec_GFp_simple_group_finish,
1517 ec_GFp_simple_group_clear_finish,
1518 ec_GFp_nist_group_copy,
1519 ec_GFp_nistp521_group_set_curve,
1520 ec_GFp_simple_group_get_curve,
1521 ec_GFp_simple_group_get_degree,
1522 ec_GFp_simple_group_check_discriminant,
1523 ec_GFp_simple_point_init,
1524 ec_GFp_simple_point_finish,
1525 ec_GFp_simple_point_clear_finish,
1526 ec_GFp_simple_point_copy,
1527 ec_GFp_simple_point_set_to_infinity,
1528 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1529 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1530 ec_GFp_simple_point_set_affine_coordinates,
1531 ec_GFp_nistp521_point_get_affine_coordinates,
1532 0 /* point_set_compressed_coordinates */,
1533 0 /* point2oct */,
1534 0 /* oct2point */,
1535 ec_GFp_simple_add,
1536 ec_GFp_simple_dbl,
1537 ec_GFp_simple_invert,
1538 ec_GFp_simple_is_at_infinity,
1539 ec_GFp_simple_is_on_curve,
1540 ec_GFp_simple_cmp,
1541 ec_GFp_simple_make_affine,
1542 ec_GFp_simple_points_make_affine,
1543 ec_GFp_nistp521_points_mul,
1544 ec_GFp_nistp521_precompute_mult,
1545 ec_GFp_nistp521_have_precompute_mult,
1546 ec_GFp_nist_field_mul,
1547 ec_GFp_nist_field_sqr,
1548 0 /* field_div */,
1549 0 /* field_encode */,
1550 0 /* field_decode */,
1551 0 /* field_set_to_one */ };
1552
1553 return &ret;
1554 }
1555
1556
1557 /******************************************************************************/
1558 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1559 */
1560
1561 static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1562 {
1563 NISTP521_PRE_COMP *ret = NULL;
1564 ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1565 if (!ret)
1566 {
1567 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1568 return ret;
1569 }
1570 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1571 ret->references = 1;
1572 return ret;
1573 }
1574
1575 static void *nistp521_pre_comp_dup(void *src_)
1576 {
1577 NISTP521_PRE_COMP *src = src_;
1578
1579 /* no need to actually copy, these objects never change! */
1580 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1581
1582 return src_;
1583 }
1584
1585 static void nistp521_pre_comp_free(void *pre_)
1586 {
1587 int i;
1588 NISTP521_PRE_COMP *pre = pre_;
1589
1590 if (!pre)
1591 return;
1592
1593 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1594 if (i > 0)
1595 return;
1596
1597 OPENSSL_free(pre);
1598 }
1599
1600 static void nistp521_pre_comp_clear_free(void *pre_)
1601 {
1602 int i;
1603 NISTP521_PRE_COMP *pre = pre_;
1604
1605 if (!pre)
1606 return;
1607
1608 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1609 if (i > 0)
1610 return;
1611
1612 OPENSSL_cleanse(pre, sizeof(*pre));
1613 OPENSSL_free(pre);
1614 }
1615
1616 /******************************************************************************/
1617 /* OPENSSL EC_METHOD FUNCTIONS
1618 */
1619
1620 int ec_GFp_nistp521_group_init(EC_GROUP *group)
1621 {
1622 int ret;
1623 ret = ec_GFp_simple_group_init(group);
1624 group->a_is_minus3 = 1;
1625 return ret;
1626 }
1627
1628 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1629 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1630 {
1631 int ret = 0;
1632 BN_CTX *new_ctx = NULL;
1633 BIGNUM *curve_p, *curve_a, *curve_b;
1634
1635 if (ctx == NULL)
1636 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1637 BN_CTX_start(ctx);
1638 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1639 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1640 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1641 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1642 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1643 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1644 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1645 (BN_cmp(curve_b, b)))
1646 {
1647 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1648 EC_R_WRONG_CURVE_PARAMETERS);
1649 goto err;
1650 }
1651 group->field_mod_func = BN_nist_mod_521;
1652 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1653 err:
1654 BN_CTX_end(ctx);
1655 if (new_ctx != NULL)
1656 BN_CTX_free(new_ctx);
1657 return ret;
1658 }
1659
1660 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1661 * (X', Y') = (X/Z^2, Y/Z^3) */
1662 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1663 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1664 {
1665 felem z1, z2, x_in, y_in, x_out, y_out;
1666 largefelem tmp;
1667
1668 if (EC_POINT_is_at_infinity(group, point))
1669 {
1670 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1671 EC_R_POINT_AT_INFINITY);
1672 return 0;
1673 }
1674 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1675 (!BN_to_felem(z1, &point->Z))) return 0;
1676 felem_inv(z2, z1);
1677 felem_square(tmp, z2); felem_reduce(z1, tmp);
1678 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1679 felem_contract(x_out, x_in);
1680 if (x != NULL)
1681 {
1682 if (!felem_to_BN(x, x_out))
1683 {
1684 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1685 return 0;
1686 }
1687 }
1688 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1689 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1690 felem_contract(y_out, y_in);
1691 if (y != NULL)
1692 {
1693 if (!felem_to_BN(y, y_out))
1694 {
1695 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1696 return 0;
1697 }
1698 }
1699 return 1;
1700 }
1701
1702 static void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */])
1703 {
1704 /* Runs in constant time, unless an input is the point at infinity
1705 * (which normally shouldn't happen). */
1706 ec_GFp_nistp_points_make_affine_internal(
1707 num,
1708 points,
1709 sizeof(felem),
1710 tmp_felems,
1711 (void (*)(void *)) felem_one,
1712 (int (*)(const void *)) felem_is_zero_int,
1713 (void (*)(void *, const void *)) felem_assign,
1714 (void (*)(void *, const void *)) felem_square_reduce,
1715 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1716 (void (*)(void *, const void *)) felem_inv,
1717 (void (*)(void *, const void *)) felem_contract);
1718 }
1719
1720 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1721 * Result is stored in r (r can equal one of the inputs). */
1722 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1723 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1724 const BIGNUM *scalars[], BN_CTX *ctx)
1725 {
1726 int ret = 0;
1727 int j;
1728 int mixed = 0;
1729 BN_CTX *new_ctx = NULL;
1730 BIGNUM *x, *y, *z, *tmp_scalar;
1731 felem_bytearray g_secret;
1732 felem_bytearray *secrets = NULL;
1733 felem (*pre_comp)[17][3] = NULL;
1734 felem *tmp_felems = NULL;
1735 felem_bytearray tmp;
1736 unsigned i, num_bytes;
1737 int have_pre_comp = 0;
1738 size_t num_points = num;
1739 felem x_in, y_in, z_in, x_out, y_out, z_out;
1740 NISTP521_PRE_COMP *pre = NULL;
1741 felem (*g_pre_comp)[3] = NULL;
1742 EC_POINT *generator = NULL;
1743 const EC_POINT *p = NULL;
1744 const BIGNUM *p_scalar = NULL;
1745
1746 if (ctx == NULL)
1747 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1748 BN_CTX_start(ctx);
1749 if (((x = BN_CTX_get(ctx)) == NULL) ||
1750 ((y = BN_CTX_get(ctx)) == NULL) ||
1751 ((z = BN_CTX_get(ctx)) == NULL) ||
1752 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1753 goto err;
1754
1755 if (scalar != NULL)
1756 {
1757 pre = EC_EX_DATA_get_data(group->extra_data,
1758 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1759 nistp521_pre_comp_clear_free);
1760 if (pre)
1761 /* we have precomputation, try to use it */
1762 g_pre_comp = &pre->g_pre_comp[0];
1763 else
1764 /* try to use the standard precomputation */
1765 g_pre_comp = (felem (*)[3]) gmul;
1766 generator = EC_POINT_new(group);
1767 if (generator == NULL)
1768 goto err;
1769 /* get the generator from precomputation */
1770 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1771 !felem_to_BN(y, g_pre_comp[1][1]) ||
1772 !felem_to_BN(z, g_pre_comp[1][2]))
1773 {
1774 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1775 goto err;
1776 }
1777 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1778 generator, x, y, z, ctx))
1779 goto err;
1780 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1781 /* precomputation matches generator */
1782 have_pre_comp = 1;
1783 else
1784 /* we don't have valid precomputation:
1785 * treat the generator as a random point */
1786 num_points++;
1787 }
1788
1789 if (num_points > 0)
1790 {
1791 if (num_points >= 2)
1792 {
1793 /* unless we precompute multiples for just one point,
1794 * converting those into affine form is time well spent */
1795 mixed = 1;
1796 }
1797 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1798 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1799 if (mixed)
1800 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1801 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1802 {
1803 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1804 goto err;
1805 }
1806
1807 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1808 * i.e., they contribute nothing to the linear combination */
1809 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1810 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1811 for (i = 0; i < num_points; ++i)
1812 {
1813 if (i == num)
1814 /* we didn't have a valid precomputation, so we pick
1815 * the generator */
1816 {
1817 p = EC_GROUP_get0_generator(group);
1818 p_scalar = scalar;
1819 }
1820 else
1821 /* the i^th point */
1822 {
1823 p = points[i];
1824 p_scalar = scalars[i];
1825 }
1826 if ((p_scalar != NULL) && (p != NULL))
1827 {
1828 /* reduce scalar to 0 <= scalar < 2^521 */
1829 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
1830 {
1831 /* this is an unusual input, and we don't guarantee
1832 * constant-timeness */
1833 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1834 {
1835 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1836 goto err;
1837 }
1838 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1839 }
1840 else
1841 num_bytes = BN_bn2bin(p_scalar, tmp);
1842 flip_endian(secrets[i], tmp, num_bytes);
1843 /* precompute multiples */
1844 if ((!BN_to_felem(x_out, &p->X)) ||
1845 (!BN_to_felem(y_out, &p->Y)) ||
1846 (!BN_to_felem(z_out, &p->Z))) goto err;
1847 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1848 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1849 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1850 for (j = 2; j <= 16; ++j)
1851 {
1852 if (j & 1)
1853 {
1854 point_add(
1855 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1856 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1857 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1858 }
1859 else
1860 {
1861 point_double(
1862 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1863 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1864 }
1865 }
1866 }
1867 }
1868 if (mixed)
1869 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1870 }
1871
1872 /* the scalar for the generator */
1873 if ((scalar != NULL) && (have_pre_comp))
1874 {
1875 memset(g_secret, 0, sizeof(g_secret));
1876 /* reduce scalar to 0 <= scalar < 2^521 */
1877 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
1878 {
1879 /* this is an unusual input, and we don't guarantee
1880 * constant-timeness */
1881 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1882 {
1883 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1884 goto err;
1885 }
1886 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1887 }
1888 else
1889 num_bytes = BN_bn2bin(scalar, tmp);
1890 flip_endian(g_secret, tmp, num_bytes);
1891 /* do the multiplication with generator precomputation*/
1892 batch_mul(x_out, y_out, z_out,
1893 (const felem_bytearray (*)) secrets, num_points,
1894 g_secret,
1895 mixed, (const felem (*)[17][3]) pre_comp,
1896 (const felem (*)[3]) g_pre_comp);
1897 }
1898 else
1899 /* do the multiplication without generator precomputation */
1900 batch_mul(x_out, y_out, z_out,
1901 (const felem_bytearray (*)) secrets, num_points,
1902 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1903 /* reduce the output to its unique minimal representation */
1904 felem_contract(x_in, x_out);
1905 felem_contract(y_in, y_out);
1906 felem_contract(z_in, z_out);
1907 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1908 (!felem_to_BN(z, z_in)))
1909 {
1910 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1911 goto err;
1912 }
1913 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1914
1915 err:
1916 BN_CTX_end(ctx);
1917 if (generator != NULL)
1918 EC_POINT_free(generator);
1919 if (new_ctx != NULL)
1920 BN_CTX_free(new_ctx);
1921 if (secrets != NULL)
1922 OPENSSL_free(secrets);
1923 if (pre_comp != NULL)
1924 OPENSSL_free(pre_comp);
1925 if (tmp_felems != NULL)
1926 OPENSSL_free(tmp_felems);
1927 return ret;
1928 }
1929
1930 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1931 {
1932 int ret = 0;
1933 NISTP521_PRE_COMP *pre = NULL;
1934 int i, j;
1935 BN_CTX *new_ctx = NULL;
1936 BIGNUM *x, *y;
1937 EC_POINT *generator = NULL;
1938 felem tmp_felems[16];
1939
1940 /* throw away old precomputation */
1941 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
1942 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1943 if (ctx == NULL)
1944 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1945 BN_CTX_start(ctx);
1946 if (((x = BN_CTX_get(ctx)) == NULL) ||
1947 ((y = BN_CTX_get(ctx)) == NULL))
1948 goto err;
1949 /* get the generator */
1950 if (group->generator == NULL) goto err;
1951 generator = EC_POINT_new(group);
1952 if (generator == NULL)
1953 goto err;
1954 BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
1955 BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
1956 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1957 goto err;
1958 if ((pre = nistp521_pre_comp_new()) == NULL)
1959 goto err;
1960 /* if the generator is the standard one, use built-in precomputation */
1961 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1962 {
1963 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1964 ret = 1;
1965 goto err;
1966 }
1967 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
1968 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
1969 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
1970 goto err;
1971 /* compute 2^130*G, 2^260*G, 2^390*G */
1972 for (i = 1; i <= 4; i <<= 1)
1973 {
1974 point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
1975 pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
1976 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1977 for (j = 0; j < 129; ++j)
1978 {
1979 point_double(pre->g_pre_comp[2*i][0],
1980 pre->g_pre_comp[2*i][1],
1981 pre->g_pre_comp[2*i][2],
1982 pre->g_pre_comp[2*i][0],
1983 pre->g_pre_comp[2*i][1],
1984 pre->g_pre_comp[2*i][2]);
1985 }
1986 }
1987 /* g_pre_comp[0] is the point at infinity */
1988 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1989 /* the remaining multiples */
1990 /* 2^130*G + 2^260*G */
1991 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
1992 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
1993 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
1994 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1995 pre->g_pre_comp[2][2]);
1996 /* 2^130*G + 2^390*G */
1997 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
1998 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
1999 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2000 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2001 pre->g_pre_comp[2][2]);
2002 /* 2^260*G + 2^390*G */
2003 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2004 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2005 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2006 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2007 pre->g_pre_comp[4][2]);
2008 /* 2^130*G + 2^260*G + 2^390*G */
2009 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2010 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2011 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2012 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2013 pre->g_pre_comp[2][2]);
2014 for (i = 1; i < 8; ++i)
2015 {
2016 /* odd multiples: add G */
2017 point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
2018 pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
2019 pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
2020 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2021 pre->g_pre_comp[1][2]);
2022 }
2023 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2024
2025 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2026 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
2027 goto err;
2028 ret = 1;
2029 pre = NULL;
2030 err:
2031 BN_CTX_end(ctx);
2032 if (generator != NULL)
2033 EC_POINT_free(generator);
2034 if (new_ctx != NULL)
2035 BN_CTX_free(new_ctx);
2036 if (pre)
2037 nistp521_pre_comp_free(pre);
2038 return ret;
2039 }
2040
2041 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2042 {
2043 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2044 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2045 != NULL)
2046 return 1;
2047 else
2048 return 0;
2049 }
2050
2051 #else
2052 static void *dummy=&dummy;
2053 #endif