]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp521.c
mark all block comments that need format preserving so that
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
1 /* crypto/ec/ecp_nistp521.c */
2 /*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32 #ifndef OPENSSL_SYS_VMS
33 #include <stdint.h>
34 #else
35 #include <inttypes.h>
36 #endif
37
38 #include <string.h>
39 #include <openssl/err.h>
40 #include "ec_lcl.h"
41
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #else
46 #error "Need GCC 3.1 or later to define type uint128_t"
47 #endif
48
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51 typedef int64_t s64;
52
53 /* The underlying field.
54 *
55 * P521 operates over GF(2^521-1). We can serialise an element of this field
56 * into 66 bytes where the most significant byte contains only a single bit. We
57 * call this an felem_bytearray. */
58
59 typedef u8 felem_bytearray[66];
60
61 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
62 * These values are big-endian. */
63 static const felem_bytearray nistp521_curve_params[5] =
64 {
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff},
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xfc},
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
91 0x3f, 0x00},
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
100 0xbd, 0x66},
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
109 0x66, 0x50}
110 };
111
112 /*-
113 * The representation of field elements.
114 * ------------------------------------
115 *
116 * We represent field elements with nine values. These values are either 64 or
117 * 128 bits and the field element represented is:
118 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
119 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
120 * 58 bits apart, but are greater than 58 bits in length, the most significant
121 * bits of each limb overlap with the least significant bits of the next.
122 *
123 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
124 * 'largefelem' */
125
126 #define NLIMBS 9
127
128 typedef uint64_t limb;
129 typedef limb felem[NLIMBS];
130 typedef uint128_t largefelem[NLIMBS];
131
132 static const limb bottom57bits = 0x1ffffffffffffff;
133 static const limb bottom58bits = 0x3ffffffffffffff;
134
135 /* bin66_to_felem takes a little-endian byte array and converts it into felem
136 * form. This assumes that the CPU is little-endian. */
137 static void bin66_to_felem(felem out, const u8 in[66])
138 {
139 out[0] = (*((limb*) &in[0])) & bottom58bits;
140 out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
141 out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
142 out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
143 out[4] = (*((limb*) &in[29])) & bottom58bits;
144 out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
145 out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
146 out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
147 out[8] = (*((limb*) &in[58])) & bottom57bits;
148 }
149
150 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
151 * array. This assumes that the CPU is little-endian. */
152 static void felem_to_bin66(u8 out[66], const felem in)
153 {
154 memset(out, 0, 66);
155 (*((limb*) &out[0])) = in[0];
156 (*((limb*) &out[7])) |= in[1] << 2;
157 (*((limb*) &out[14])) |= in[2] << 4;
158 (*((limb*) &out[21])) |= in[3] << 6;
159 (*((limb*) &out[29])) = in[4];
160 (*((limb*) &out[36])) |= in[5] << 2;
161 (*((limb*) &out[43])) |= in[6] << 4;
162 (*((limb*) &out[50])) |= in[7] << 6;
163 (*((limb*) &out[58])) = in[8];
164 }
165
166 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
167 static void flip_endian(u8 *out, const u8 *in, unsigned len)
168 {
169 unsigned i;
170 for (i = 0; i < len; ++i)
171 out[i] = in[len-1-i];
172 }
173
174 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
175 static int BN_to_felem(felem out, const BIGNUM *bn)
176 {
177 felem_bytearray b_in;
178 felem_bytearray b_out;
179 unsigned num_bytes;
180
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out, 0, sizeof b_out);
183 num_bytes = BN_num_bytes(bn);
184 if (num_bytes > sizeof b_out)
185 {
186 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
187 return 0;
188 }
189 if (BN_is_negative(bn))
190 {
191 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
192 return 0;
193 }
194 num_bytes = BN_bn2bin(bn, b_in);
195 flip_endian(b_out, b_in, num_bytes);
196 bin66_to_felem(out, b_out);
197 return 1;
198 }
199
200 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
201 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
202 {
203 felem_bytearray b_in, b_out;
204 felem_to_bin66(b_in, in);
205 flip_endian(b_out, b_in, sizeof b_out);
206 return BN_bin2bn(b_out, sizeof b_out, out);
207 }
208
209
210 /* Field operations
211 * ---------------- */
212
213 static void felem_one(felem out)
214 {
215 out[0] = 1;
216 out[1] = 0;
217 out[2] = 0;
218 out[3] = 0;
219 out[4] = 0;
220 out[5] = 0;
221 out[6] = 0;
222 out[7] = 0;
223 out[8] = 0;
224 }
225
226 static void felem_assign(felem out, const felem in)
227 {
228 out[0] = in[0];
229 out[1] = in[1];
230 out[2] = in[2];
231 out[3] = in[3];
232 out[4] = in[4];
233 out[5] = in[5];
234 out[6] = in[6];
235 out[7] = in[7];
236 out[8] = in[8];
237 }
238
239 /* felem_sum64 sets out = out + in. */
240 static void felem_sum64(felem out, const felem in)
241 {
242 out[0] += in[0];
243 out[1] += in[1];
244 out[2] += in[2];
245 out[3] += in[3];
246 out[4] += in[4];
247 out[5] += in[5];
248 out[6] += in[6];
249 out[7] += in[7];
250 out[8] += in[8];
251 }
252
253 /* felem_scalar sets out = in * scalar */
254 static void felem_scalar(felem out, const felem in, limb scalar)
255 {
256 out[0] = in[0] * scalar;
257 out[1] = in[1] * scalar;
258 out[2] = in[2] * scalar;
259 out[3] = in[3] * scalar;
260 out[4] = in[4] * scalar;
261 out[5] = in[5] * scalar;
262 out[6] = in[6] * scalar;
263 out[7] = in[7] * scalar;
264 out[8] = in[8] * scalar;
265 }
266
267 /* felem_scalar64 sets out = out * scalar */
268 static void felem_scalar64(felem out, limb scalar)
269 {
270 out[0] *= scalar;
271 out[1] *= scalar;
272 out[2] *= scalar;
273 out[3] *= scalar;
274 out[4] *= scalar;
275 out[5] *= scalar;
276 out[6] *= scalar;
277 out[7] *= scalar;
278 out[8] *= scalar;
279 }
280
281 /* felem_scalar128 sets out = out * scalar */
282 static void felem_scalar128(largefelem out, limb scalar)
283 {
284 out[0] *= scalar;
285 out[1] *= scalar;
286 out[2] *= scalar;
287 out[3] *= scalar;
288 out[4] *= scalar;
289 out[5] *= scalar;
290 out[6] *= scalar;
291 out[7] *= scalar;
292 out[8] *= scalar;
293 }
294
295 /*-
296 * felem_neg sets |out| to |-in|
297 * On entry:
298 * in[i] < 2^59 + 2^14
299 * On exit:
300 * out[i] < 2^62
301 */
302 static void felem_neg(felem out, const felem in)
303 {
304 /* In order to prevent underflow, we subtract from 0 mod p. */
305 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
306 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
307
308 out[0] = two62m3 - in[0];
309 out[1] = two62m2 - in[1];
310 out[2] = two62m2 - in[2];
311 out[3] = two62m2 - in[3];
312 out[4] = two62m2 - in[4];
313 out[5] = two62m2 - in[5];
314 out[6] = two62m2 - in[6];
315 out[7] = two62m2 - in[7];
316 out[8] = two62m2 - in[8];
317 }
318
319 /*-
320 * felem_diff64 subtracts |in| from |out|
321 * On entry:
322 * in[i] < 2^59 + 2^14
323 * On exit:
324 * out[i] < out[i] + 2^62
325 */
326 static void felem_diff64(felem out, const felem in)
327 {
328 /* In order to prevent underflow, we add 0 mod p before subtracting. */
329 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
330 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
331
332 out[0] += two62m3 - in[0];
333 out[1] += two62m2 - in[1];
334 out[2] += two62m2 - in[2];
335 out[3] += two62m2 - in[3];
336 out[4] += two62m2 - in[4];
337 out[5] += two62m2 - in[5];
338 out[6] += two62m2 - in[6];
339 out[7] += two62m2 - in[7];
340 out[8] += two62m2 - in[8];
341 }
342
343 /*-
344 * felem_diff_128_64 subtracts |in| from |out|
345 * On entry:
346 * in[i] < 2^62 + 2^17
347 * On exit:
348 * out[i] < out[i] + 2^63
349 */
350 static void felem_diff_128_64(largefelem out, const felem in)
351 {
352 /* In order to prevent underflow, we add 0 mod p before subtracting. */
353 static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
354 static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
355
356 out[0] += two63m6 - in[0];
357 out[1] += two63m5 - in[1];
358 out[2] += two63m5 - in[2];
359 out[3] += two63m5 - in[3];
360 out[4] += two63m5 - in[4];
361 out[5] += two63m5 - in[5];
362 out[6] += two63m5 - in[6];
363 out[7] += two63m5 - in[7];
364 out[8] += two63m5 - in[8];
365 }
366
367 /*-
368 * felem_diff_128_64 subtracts |in| from |out|
369 * On entry:
370 * in[i] < 2^126
371 * On exit:
372 * out[i] < out[i] + 2^127 - 2^69
373 */
374 static void felem_diff128(largefelem out, const largefelem in)
375 {
376 /* In order to prevent underflow, we add 0 mod p before subtracting. */
377 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
378 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
379
380 out[0] += (two127m70 - in[0]);
381 out[1] += (two127m69 - in[1]);
382 out[2] += (two127m69 - in[2]);
383 out[3] += (two127m69 - in[3]);
384 out[4] += (two127m69 - in[4]);
385 out[5] += (two127m69 - in[5]);
386 out[6] += (two127m69 - in[6]);
387 out[7] += (two127m69 - in[7]);
388 out[8] += (two127m69 - in[8]);
389 }
390
391 /*-
392 * felem_square sets |out| = |in|^2
393 * On entry:
394 * in[i] < 2^62
395 * On exit:
396 * out[i] < 17 * max(in[i]) * max(in[i])
397 */
398 static void felem_square(largefelem out, const felem in)
399 {
400 felem inx2, inx4;
401 felem_scalar(inx2, in, 2);
402 felem_scalar(inx4, in, 4);
403
404 /*-
405 * We have many cases were we want to do
406 * in[x] * in[y] +
407 * in[y] * in[x]
408 * This is obviously just
409 * 2 * in[x] * in[y]
410 * However, rather than do the doubling on the 128 bit result, we
411 * double one of the inputs to the multiplication by reading from
412 * |inx2| */
413
414 out[0] = ((uint128_t) in[0]) * in[0];
415 out[1] = ((uint128_t) in[0]) * inx2[1];
416 out[2] = ((uint128_t) in[0]) * inx2[2] +
417 ((uint128_t) in[1]) * in[1];
418 out[3] = ((uint128_t) in[0]) * inx2[3] +
419 ((uint128_t) in[1]) * inx2[2];
420 out[4] = ((uint128_t) in[0]) * inx2[4] +
421 ((uint128_t) in[1]) * inx2[3] +
422 ((uint128_t) in[2]) * in[2];
423 out[5] = ((uint128_t) in[0]) * inx2[5] +
424 ((uint128_t) in[1]) * inx2[4] +
425 ((uint128_t) in[2]) * inx2[3];
426 out[6] = ((uint128_t) in[0]) * inx2[6] +
427 ((uint128_t) in[1]) * inx2[5] +
428 ((uint128_t) in[2]) * inx2[4] +
429 ((uint128_t) in[3]) * in[3];
430 out[7] = ((uint128_t) in[0]) * inx2[7] +
431 ((uint128_t) in[1]) * inx2[6] +
432 ((uint128_t) in[2]) * inx2[5] +
433 ((uint128_t) in[3]) * inx2[4];
434 out[8] = ((uint128_t) in[0]) * inx2[8] +
435 ((uint128_t) in[1]) * inx2[7] +
436 ((uint128_t) in[2]) * inx2[6] +
437 ((uint128_t) in[3]) * inx2[5] +
438 ((uint128_t) in[4]) * in[4];
439
440 /* The remaining limbs fall above 2^521, with the first falling at
441 * 2^522. They correspond to locations one bit up from the limbs
442 * produced above so we would have to multiply by two to align them.
443 * Again, rather than operate on the 128-bit result, we double one of
444 * the inputs to the multiplication. If we want to double for both this
445 * reason, and the reason above, then we end up multiplying by four. */
446
447 /* 9 */
448 out[0] += ((uint128_t) in[1]) * inx4[8] +
449 ((uint128_t) in[2]) * inx4[7] +
450 ((uint128_t) in[3]) * inx4[6] +
451 ((uint128_t) in[4]) * inx4[5];
452
453 /* 10 */
454 out[1] += ((uint128_t) in[2]) * inx4[8] +
455 ((uint128_t) in[3]) * inx4[7] +
456 ((uint128_t) in[4]) * inx4[6] +
457 ((uint128_t) in[5]) * inx2[5];
458
459 /* 11 */
460 out[2] += ((uint128_t) in[3]) * inx4[8] +
461 ((uint128_t) in[4]) * inx4[7] +
462 ((uint128_t) in[5]) * inx4[6];
463
464 /* 12 */
465 out[3] += ((uint128_t) in[4]) * inx4[8] +
466 ((uint128_t) in[5]) * inx4[7] +
467 ((uint128_t) in[6]) * inx2[6];
468
469 /* 13 */
470 out[4] += ((uint128_t) in[5]) * inx4[8] +
471 ((uint128_t) in[6]) * inx4[7];
472
473 /* 14 */
474 out[5] += ((uint128_t) in[6]) * inx4[8] +
475 ((uint128_t) in[7]) * inx2[7];
476
477 /* 15 */
478 out[6] += ((uint128_t) in[7]) * inx4[8];
479
480 /* 16 */
481 out[7] += ((uint128_t) in[8]) * inx2[8];
482 }
483
484 /*-
485 * felem_mul sets |out| = |in1| * |in2|
486 * On entry:
487 * in1[i] < 2^64
488 * in2[i] < 2^63
489 * On exit:
490 * out[i] < 17 * max(in1[i]) * max(in2[i])
491 */
492 static void felem_mul(largefelem out, const felem in1, const felem in2)
493 {
494 felem in2x2;
495 felem_scalar(in2x2, in2, 2);
496
497 out[0] = ((uint128_t) in1[0]) * in2[0];
498
499 out[1] = ((uint128_t) in1[0]) * in2[1] +
500 ((uint128_t) in1[1]) * in2[0];
501
502 out[2] = ((uint128_t) in1[0]) * in2[2] +
503 ((uint128_t) in1[1]) * in2[1] +
504 ((uint128_t) in1[2]) * in2[0];
505
506 out[3] = ((uint128_t) in1[0]) * in2[3] +
507 ((uint128_t) in1[1]) * in2[2] +
508 ((uint128_t) in1[2]) * in2[1] +
509 ((uint128_t) in1[3]) * in2[0];
510
511 out[4] = ((uint128_t) in1[0]) * in2[4] +
512 ((uint128_t) in1[1]) * in2[3] +
513 ((uint128_t) in1[2]) * in2[2] +
514 ((uint128_t) in1[3]) * in2[1] +
515 ((uint128_t) in1[4]) * in2[0];
516
517 out[5] = ((uint128_t) in1[0]) * in2[5] +
518 ((uint128_t) in1[1]) * in2[4] +
519 ((uint128_t) in1[2]) * in2[3] +
520 ((uint128_t) in1[3]) * in2[2] +
521 ((uint128_t) in1[4]) * in2[1] +
522 ((uint128_t) in1[5]) * in2[0];
523
524 out[6] = ((uint128_t) in1[0]) * in2[6] +
525 ((uint128_t) in1[1]) * in2[5] +
526 ((uint128_t) in1[2]) * in2[4] +
527 ((uint128_t) in1[3]) * in2[3] +
528 ((uint128_t) in1[4]) * in2[2] +
529 ((uint128_t) in1[5]) * in2[1] +
530 ((uint128_t) in1[6]) * in2[0];
531
532 out[7] = ((uint128_t) in1[0]) * in2[7] +
533 ((uint128_t) in1[1]) * in2[6] +
534 ((uint128_t) in1[2]) * in2[5] +
535 ((uint128_t) in1[3]) * in2[4] +
536 ((uint128_t) in1[4]) * in2[3] +
537 ((uint128_t) in1[5]) * in2[2] +
538 ((uint128_t) in1[6]) * in2[1] +
539 ((uint128_t) in1[7]) * in2[0];
540
541 out[8] = ((uint128_t) in1[0]) * in2[8] +
542 ((uint128_t) in1[1]) * in2[7] +
543 ((uint128_t) in1[2]) * in2[6] +
544 ((uint128_t) in1[3]) * in2[5] +
545 ((uint128_t) in1[4]) * in2[4] +
546 ((uint128_t) in1[5]) * in2[3] +
547 ((uint128_t) in1[6]) * in2[2] +
548 ((uint128_t) in1[7]) * in2[1] +
549 ((uint128_t) in1[8]) * in2[0];
550
551 /* See comment in felem_square about the use of in2x2 here */
552
553 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
554 ((uint128_t) in1[2]) * in2x2[7] +
555 ((uint128_t) in1[3]) * in2x2[6] +
556 ((uint128_t) in1[4]) * in2x2[5] +
557 ((uint128_t) in1[5]) * in2x2[4] +
558 ((uint128_t) in1[6]) * in2x2[3] +
559 ((uint128_t) in1[7]) * in2x2[2] +
560 ((uint128_t) in1[8]) * in2x2[1];
561
562 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
563 ((uint128_t) in1[3]) * in2x2[7] +
564 ((uint128_t) in1[4]) * in2x2[6] +
565 ((uint128_t) in1[5]) * in2x2[5] +
566 ((uint128_t) in1[6]) * in2x2[4] +
567 ((uint128_t) in1[7]) * in2x2[3] +
568 ((uint128_t) in1[8]) * in2x2[2];
569
570 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
571 ((uint128_t) in1[4]) * in2x2[7] +
572 ((uint128_t) in1[5]) * in2x2[6] +
573 ((uint128_t) in1[6]) * in2x2[5] +
574 ((uint128_t) in1[7]) * in2x2[4] +
575 ((uint128_t) in1[8]) * in2x2[3];
576
577 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
578 ((uint128_t) in1[5]) * in2x2[7] +
579 ((uint128_t) in1[6]) * in2x2[6] +
580 ((uint128_t) in1[7]) * in2x2[5] +
581 ((uint128_t) in1[8]) * in2x2[4];
582
583 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
584 ((uint128_t) in1[6]) * in2x2[7] +
585 ((uint128_t) in1[7]) * in2x2[6] +
586 ((uint128_t) in1[8]) * in2x2[5];
587
588 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
589 ((uint128_t) in1[7]) * in2x2[7] +
590 ((uint128_t) in1[8]) * in2x2[6];
591
592 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
593 ((uint128_t) in1[8]) * in2x2[7];
594
595 out[7] += ((uint128_t) in1[8]) * in2x2[8];
596 }
597
598 static const limb bottom52bits = 0xfffffffffffff;
599
600 /*-
601 * felem_reduce converts a largefelem to an felem.
602 * On entry:
603 * in[i] < 2^128
604 * On exit:
605 * out[i] < 2^59 + 2^14
606 */
607 static void felem_reduce(felem out, const largefelem in)
608 {
609 u64 overflow1, overflow2;
610
611 out[0] = ((limb) in[0]) & bottom58bits;
612 out[1] = ((limb) in[1]) & bottom58bits;
613 out[2] = ((limb) in[2]) & bottom58bits;
614 out[3] = ((limb) in[3]) & bottom58bits;
615 out[4] = ((limb) in[4]) & bottom58bits;
616 out[5] = ((limb) in[5]) & bottom58bits;
617 out[6] = ((limb) in[6]) & bottom58bits;
618 out[7] = ((limb) in[7]) & bottom58bits;
619 out[8] = ((limb) in[8]) & bottom58bits;
620
621 /* out[i] < 2^58 */
622
623 out[1] += ((limb) in[0]) >> 58;
624 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
625 /* out[1] < 2^58 + 2^6 + 2^58
626 * = 2^59 + 2^6 */
627 out[2] += ((limb) (in[0] >> 64)) >> 52;
628
629 out[2] += ((limb) in[1]) >> 58;
630 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
631 out[3] += ((limb) (in[1] >> 64)) >> 52;
632
633 out[3] += ((limb) in[2]) >> 58;
634 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
635 out[4] += ((limb) (in[2] >> 64)) >> 52;
636
637 out[4] += ((limb) in[3]) >> 58;
638 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
639 out[5] += ((limb) (in[3] >> 64)) >> 52;
640
641 out[5] += ((limb) in[4]) >> 58;
642 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
643 out[6] += ((limb) (in[4] >> 64)) >> 52;
644
645 out[6] += ((limb) in[5]) >> 58;
646 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
647 out[7] += ((limb) (in[5] >> 64)) >> 52;
648
649 out[7] += ((limb) in[6]) >> 58;
650 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
651 out[8] += ((limb) (in[6] >> 64)) >> 52;
652
653 out[8] += ((limb) in[7]) >> 58;
654 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
655 /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
656 * < 2^59 + 2^13 */
657 overflow1 = ((limb) (in[7] >> 64)) >> 52;
658
659 overflow1 += ((limb) in[8]) >> 58;
660 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
661 overflow2 = ((limb) (in[8] >> 64)) >> 52;
662
663 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
664 overflow2 <<= 1; /* overflow2 < 2^13 */
665
666 out[0] += overflow1; /* out[0] < 2^60 */
667 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
668
669 out[1] += out[0] >> 58; out[0] &= bottom58bits;
670 /* out[0] < 2^58
671 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
672 * < 2^59 + 2^14 */
673 }
674
675 static void felem_square_reduce(felem out, const felem in)
676 {
677 largefelem tmp;
678 felem_square(tmp, in);
679 felem_reduce(out, tmp);
680 }
681
682 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
683 {
684 largefelem tmp;
685 felem_mul(tmp, in1, in2);
686 felem_reduce(out, tmp);
687 }
688
689 /*-
690 * felem_inv calculates |out| = |in|^{-1}
691 *
692 * Based on Fermat's Little Theorem:
693 * a^p = a (mod p)
694 * a^{p-1} = 1 (mod p)
695 * a^{p-2} = a^{-1} (mod p)
696 */
697 static void felem_inv(felem out, const felem in)
698 {
699 felem ftmp, ftmp2, ftmp3, ftmp4;
700 largefelem tmp;
701 unsigned i;
702
703 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
704 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
705 felem_assign(ftmp2, ftmp);
706 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
707 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
708 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
709
710 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
711 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
712 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
713
714 felem_assign(ftmp2, ftmp3);
715 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
716 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
717 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
718 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
719 felem_assign(ftmp4, ftmp3);
720 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
721 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
722 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
723 felem_assign(ftmp2, ftmp3);
724
725 for (i = 0; i < 8; i++)
726 {
727 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
728 }
729 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
730 felem_assign(ftmp2, ftmp3);
731
732 for (i = 0; i < 16; i++)
733 {
734 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
735 }
736 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
737 felem_assign(ftmp2, ftmp3);
738
739 for (i = 0; i < 32; i++)
740 {
741 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
742 }
743 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
744 felem_assign(ftmp2, ftmp3);
745
746 for (i = 0; i < 64; i++)
747 {
748 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
749 }
750 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
751 felem_assign(ftmp2, ftmp3);
752
753 for (i = 0; i < 128; i++)
754 {
755 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
756 }
757 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
758 felem_assign(ftmp2, ftmp3);
759
760 for (i = 0; i < 256; i++)
761 {
762 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
763 }
764 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
765
766 for (i = 0; i < 9; i++)
767 {
768 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
769 }
770 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
771 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
772 }
773
774 /* This is 2^521-1, expressed as an felem */
775 static const felem kPrime =
776 {
777 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
778 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
779 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
780 };
781
782 /*-
783 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
784 * otherwise.
785 * On entry:
786 * in[i] < 2^59 + 2^14
787 */
788 static limb felem_is_zero(const felem in)
789 {
790 felem ftmp;
791 limb is_zero, is_p;
792 felem_assign(ftmp, in);
793
794 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
795 /* ftmp[8] < 2^57 */
796 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
797 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
798 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
799 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
800 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
801 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
802 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
803 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
804 /* ftmp[8] < 2^57 + 4 */
805
806 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
807 * greater than our bound for ftmp[8]. Therefore we only have to check
808 * if the zero is zero or 2^521-1. */
809
810 is_zero = 0;
811 is_zero |= ftmp[0];
812 is_zero |= ftmp[1];
813 is_zero |= ftmp[2];
814 is_zero |= ftmp[3];
815 is_zero |= ftmp[4];
816 is_zero |= ftmp[5];
817 is_zero |= ftmp[6];
818 is_zero |= ftmp[7];
819 is_zero |= ftmp[8];
820
821 is_zero--;
822 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
823 * can be set is if is_zero was 0 before the decrement. */
824 is_zero = ((s64) is_zero) >> 63;
825
826 is_p = ftmp[0] ^ kPrime[0];
827 is_p |= ftmp[1] ^ kPrime[1];
828 is_p |= ftmp[2] ^ kPrime[2];
829 is_p |= ftmp[3] ^ kPrime[3];
830 is_p |= ftmp[4] ^ kPrime[4];
831 is_p |= ftmp[5] ^ kPrime[5];
832 is_p |= ftmp[6] ^ kPrime[6];
833 is_p |= ftmp[7] ^ kPrime[7];
834 is_p |= ftmp[8] ^ kPrime[8];
835
836 is_p--;
837 is_p = ((s64) is_p) >> 63;
838
839 is_zero |= is_p;
840 return is_zero;
841 }
842
843 static int felem_is_zero_int(const felem in)
844 {
845 return (int) (felem_is_zero(in) & ((limb)1));
846 }
847
848 /*-
849 * felem_contract converts |in| to its unique, minimal representation.
850 * On entry:
851 * in[i] < 2^59 + 2^14
852 */
853 static void felem_contract(felem out, const felem in)
854 {
855 limb is_p, is_greater, sign;
856 static const limb two58 = ((limb)1) << 58;
857
858 felem_assign(out, in);
859
860 out[0] += out[8] >> 57; out[8] &= bottom57bits;
861 /* out[8] < 2^57 */
862 out[1] += out[0] >> 58; out[0] &= bottom58bits;
863 out[2] += out[1] >> 58; out[1] &= bottom58bits;
864 out[3] += out[2] >> 58; out[2] &= bottom58bits;
865 out[4] += out[3] >> 58; out[3] &= bottom58bits;
866 out[5] += out[4] >> 58; out[4] &= bottom58bits;
867 out[6] += out[5] >> 58; out[5] &= bottom58bits;
868 out[7] += out[6] >> 58; out[6] &= bottom58bits;
869 out[8] += out[7] >> 58; out[7] &= bottom58bits;
870 /* out[8] < 2^57 + 4 */
871
872 /* If the value is greater than 2^521-1 then we have to subtract
873 * 2^521-1 out. See the comments in felem_is_zero regarding why we
874 * don't test for other multiples of the prime. */
875
876 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
877
878 is_p = out[0] ^ kPrime[0];
879 is_p |= out[1] ^ kPrime[1];
880 is_p |= out[2] ^ kPrime[2];
881 is_p |= out[3] ^ kPrime[3];
882 is_p |= out[4] ^ kPrime[4];
883 is_p |= out[5] ^ kPrime[5];
884 is_p |= out[6] ^ kPrime[6];
885 is_p |= out[7] ^ kPrime[7];
886 is_p |= out[8] ^ kPrime[8];
887
888 is_p--;
889 is_p &= is_p << 32;
890 is_p &= is_p << 16;
891 is_p &= is_p << 8;
892 is_p &= is_p << 4;
893 is_p &= is_p << 2;
894 is_p &= is_p << 1;
895 is_p = ((s64) is_p) >> 63;
896 is_p = ~is_p;
897
898 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
899
900 out[0] &= is_p;
901 out[1] &= is_p;
902 out[2] &= is_p;
903 out[3] &= is_p;
904 out[4] &= is_p;
905 out[5] &= is_p;
906 out[6] &= is_p;
907 out[7] &= is_p;
908 out[8] &= is_p;
909
910 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
911 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
912 is_greater = out[8] >> 57;
913 is_greater |= is_greater << 32;
914 is_greater |= is_greater << 16;
915 is_greater |= is_greater << 8;
916 is_greater |= is_greater << 4;
917 is_greater |= is_greater << 2;
918 is_greater |= is_greater << 1;
919 is_greater = ((s64) is_greater) >> 63;
920
921 out[0] -= kPrime[0] & is_greater;
922 out[1] -= kPrime[1] & is_greater;
923 out[2] -= kPrime[2] & is_greater;
924 out[3] -= kPrime[3] & is_greater;
925 out[4] -= kPrime[4] & is_greater;
926 out[5] -= kPrime[5] & is_greater;
927 out[6] -= kPrime[6] & is_greater;
928 out[7] -= kPrime[7] & is_greater;
929 out[8] -= kPrime[8] & is_greater;
930
931 /* Eliminate negative coefficients */
932 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
933 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
934 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
935 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
936 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
937 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
938 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
939 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
940 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
941 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
942 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
943 }
944
945 /*-
946 * Group operations
947 * ----------------
948 *
949 * Building on top of the field operations we have the operations on the
950 * elliptic curve group itself. Points on the curve are represented in Jacobian
951 * coordinates */
952
953 /*-
954 * point_double calcuates 2*(x_in, y_in, z_in)
955 *
956 * The method is taken from:
957 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
958 *
959 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
960 * while x_out == y_in is not (maybe this works, but it's not tested). */
961 static void
962 point_double(felem x_out, felem y_out, felem z_out,
963 const felem x_in, const felem y_in, const felem z_in)
964 {
965 largefelem tmp, tmp2;
966 felem delta, gamma, beta, alpha, ftmp, ftmp2;
967
968 felem_assign(ftmp, x_in);
969 felem_assign(ftmp2, x_in);
970
971 /* delta = z^2 */
972 felem_square(tmp, z_in);
973 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
974
975 /* gamma = y^2 */
976 felem_square(tmp, y_in);
977 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
978
979 /* beta = x*gamma */
980 felem_mul(tmp, x_in, gamma);
981 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
982
983 /* alpha = 3*(x-delta)*(x+delta) */
984 felem_diff64(ftmp, delta);
985 /* ftmp[i] < 2^61 */
986 felem_sum64(ftmp2, delta);
987 /* ftmp2[i] < 2^60 + 2^15 */
988 felem_scalar64(ftmp2, 3);
989 /* ftmp2[i] < 3*2^60 + 3*2^15 */
990 felem_mul(tmp, ftmp, ftmp2);
991 /*-
992 * tmp[i] < 17(3*2^121 + 3*2^76)
993 * = 61*2^121 + 61*2^76
994 * < 64*2^121 + 64*2^76
995 * = 2^127 + 2^82
996 * < 2^128
997 */
998 felem_reduce(alpha, tmp);
999
1000 /* x' = alpha^2 - 8*beta */
1001 felem_square(tmp, alpha);
1002 /* tmp[i] < 17*2^120
1003 * < 2^125 */
1004 felem_assign(ftmp, beta);
1005 felem_scalar64(ftmp, 8);
1006 /* ftmp[i] < 2^62 + 2^17 */
1007 felem_diff_128_64(tmp, ftmp);
1008 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1009 felem_reduce(x_out, tmp);
1010
1011 /* z' = (y + z)^2 - gamma - delta */
1012 felem_sum64(delta, gamma);
1013 /* delta[i] < 2^60 + 2^15 */
1014 felem_assign(ftmp, y_in);
1015 felem_sum64(ftmp, z_in);
1016 /* ftmp[i] < 2^60 + 2^15 */
1017 felem_square(tmp, ftmp);
1018 /* tmp[i] < 17(2^122)
1019 * < 2^127 */
1020 felem_diff_128_64(tmp, delta);
1021 /* tmp[i] < 2^127 + 2^63 */
1022 felem_reduce(z_out, tmp);
1023
1024 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1025 felem_scalar64(beta, 4);
1026 /* beta[i] < 2^61 + 2^16 */
1027 felem_diff64(beta, x_out);
1028 /* beta[i] < 2^61 + 2^60 + 2^16 */
1029 felem_mul(tmp, alpha, beta);
1030 /*-
1031 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1032 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1033 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1034 * < 2^128
1035 */
1036 felem_square(tmp2, gamma);
1037 /*-
1038 * tmp2[i] < 17*(2^59 + 2^14)^2
1039 * = 17*(2^118 + 2^74 + 2^28)
1040 */
1041 felem_scalar128(tmp2, 8);
1042 /*-
1043 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1044 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1045 * < 2^126
1046 */
1047 felem_diff128(tmp, tmp2);
1048 /*-
1049 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1050 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1051 * 2^74 + 2^69 + 2^34 + 2^30
1052 * < 2^128
1053 */
1054 felem_reduce(y_out, tmp);
1055 }
1056
1057 /* copy_conditional copies in to out iff mask is all ones. */
1058 static void
1059 copy_conditional(felem out, const felem in, limb mask)
1060 {
1061 unsigned i;
1062 for (i = 0; i < NLIMBS; ++i)
1063 {
1064 const limb tmp = mask & (in[i] ^ out[i]);
1065 out[i] ^= tmp;
1066 }
1067 }
1068
1069 /*-
1070 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1071 *
1072 * The method is taken from
1073 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1074 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1075 *
1076 * This function includes a branch for checking whether the two input points
1077 * are equal (while not equal to the point at infinity). This case never
1078 * happens during single point multiplication, so there is no timing leak for
1079 * ECDH or ECDSA signing. */
1080 static void point_add(felem x3, felem y3, felem z3,
1081 const felem x1, const felem y1, const felem z1,
1082 const int mixed, const felem x2, const felem y2, const felem z2)
1083 {
1084 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1085 largefelem tmp, tmp2;
1086 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1087
1088 z1_is_zero = felem_is_zero(z1);
1089 z2_is_zero = felem_is_zero(z2);
1090
1091 /* ftmp = z1z1 = z1**2 */
1092 felem_square(tmp, z1);
1093 felem_reduce(ftmp, tmp);
1094
1095 if (!mixed)
1096 {
1097 /* ftmp2 = z2z2 = z2**2 */
1098 felem_square(tmp, z2);
1099 felem_reduce(ftmp2, tmp);
1100
1101 /* u1 = ftmp3 = x1*z2z2 */
1102 felem_mul(tmp, x1, ftmp2);
1103 felem_reduce(ftmp3, tmp);
1104
1105 /* ftmp5 = z1 + z2 */
1106 felem_assign(ftmp5, z1);
1107 felem_sum64(ftmp5, z2);
1108 /* ftmp5[i] < 2^61 */
1109
1110 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1111 felem_square(tmp, ftmp5);
1112 /* tmp[i] < 17*2^122 */
1113 felem_diff_128_64(tmp, ftmp);
1114 /* tmp[i] < 17*2^122 + 2^63 */
1115 felem_diff_128_64(tmp, ftmp2);
1116 /* tmp[i] < 17*2^122 + 2^64 */
1117 felem_reduce(ftmp5, tmp);
1118
1119 /* ftmp2 = z2 * z2z2 */
1120 felem_mul(tmp, ftmp2, z2);
1121 felem_reduce(ftmp2, tmp);
1122
1123 /* s1 = ftmp6 = y1 * z2**3 */
1124 felem_mul(tmp, y1, ftmp2);
1125 felem_reduce(ftmp6, tmp);
1126 }
1127 else
1128 {
1129 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1130
1131 /* u1 = ftmp3 = x1*z2z2 */
1132 felem_assign(ftmp3, x1);
1133
1134 /* ftmp5 = 2*z1z2 */
1135 felem_scalar(ftmp5, z1, 2);
1136
1137 /* s1 = ftmp6 = y1 * z2**3 */
1138 felem_assign(ftmp6, y1);
1139 }
1140
1141 /* u2 = x2*z1z1 */
1142 felem_mul(tmp, x2, ftmp);
1143 /* tmp[i] < 17*2^120 */
1144
1145 /* h = ftmp4 = u2 - u1 */
1146 felem_diff_128_64(tmp, ftmp3);
1147 /* tmp[i] < 17*2^120 + 2^63 */
1148 felem_reduce(ftmp4, tmp);
1149
1150 x_equal = felem_is_zero(ftmp4);
1151
1152 /* z_out = ftmp5 * h */
1153 felem_mul(tmp, ftmp5, ftmp4);
1154 felem_reduce(z_out, tmp);
1155
1156 /* ftmp = z1 * z1z1 */
1157 felem_mul(tmp, ftmp, z1);
1158 felem_reduce(ftmp, tmp);
1159
1160 /* s2 = tmp = y2 * z1**3 */
1161 felem_mul(tmp, y2, ftmp);
1162 /* tmp[i] < 17*2^120 */
1163
1164 /* r = ftmp5 = (s2 - s1)*2 */
1165 felem_diff_128_64(tmp, ftmp6);
1166 /* tmp[i] < 17*2^120 + 2^63 */
1167 felem_reduce(ftmp5, tmp);
1168 y_equal = felem_is_zero(ftmp5);
1169 felem_scalar64(ftmp5, 2);
1170 /* ftmp5[i] < 2^61 */
1171
1172 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1173 {
1174 point_double(x3, y3, z3, x1, y1, z1);
1175 return;
1176 }
1177
1178 /* I = ftmp = (2h)**2 */
1179 felem_assign(ftmp, ftmp4);
1180 felem_scalar64(ftmp, 2);
1181 /* ftmp[i] < 2^61 */
1182 felem_square(tmp, ftmp);
1183 /* tmp[i] < 17*2^122 */
1184 felem_reduce(ftmp, tmp);
1185
1186 /* J = ftmp2 = h * I */
1187 felem_mul(tmp, ftmp4, ftmp);
1188 felem_reduce(ftmp2, tmp);
1189
1190 /* V = ftmp4 = U1 * I */
1191 felem_mul(tmp, ftmp3, ftmp);
1192 felem_reduce(ftmp4, tmp);
1193
1194 /* x_out = r**2 - J - 2V */
1195 felem_square(tmp, ftmp5);
1196 /* tmp[i] < 17*2^122 */
1197 felem_diff_128_64(tmp, ftmp2);
1198 /* tmp[i] < 17*2^122 + 2^63 */
1199 felem_assign(ftmp3, ftmp4);
1200 felem_scalar64(ftmp4, 2);
1201 /* ftmp4[i] < 2^61 */
1202 felem_diff_128_64(tmp, ftmp4);
1203 /* tmp[i] < 17*2^122 + 2^64 */
1204 felem_reduce(x_out, tmp);
1205
1206 /* y_out = r(V-x_out) - 2 * s1 * J */
1207 felem_diff64(ftmp3, x_out);
1208 /* ftmp3[i] < 2^60 + 2^60
1209 * = 2^61 */
1210 felem_mul(tmp, ftmp5, ftmp3);
1211 /* tmp[i] < 17*2^122 */
1212 felem_mul(tmp2, ftmp6, ftmp2);
1213 /* tmp2[i] < 17*2^120 */
1214 felem_scalar128(tmp2, 2);
1215 /* tmp2[i] < 17*2^121 */
1216 felem_diff128(tmp, tmp2);
1217 /* tmp[i] < 2^127 - 2^69 + 17*2^122
1218 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1219 * < 2^127 */
1220 felem_reduce(y_out, tmp);
1221
1222 copy_conditional(x_out, x2, z1_is_zero);
1223 copy_conditional(x_out, x1, z2_is_zero);
1224 copy_conditional(y_out, y2, z1_is_zero);
1225 copy_conditional(y_out, y1, z2_is_zero);
1226 copy_conditional(z_out, z2, z1_is_zero);
1227 copy_conditional(z_out, z1, z2_is_zero);
1228 felem_assign(x3, x_out);
1229 felem_assign(y3, y_out);
1230 felem_assign(z3, z_out);
1231 }
1232
1233 /*-
1234 * Base point pre computation
1235 * --------------------------
1236 *
1237 * Two different sorts of precomputed tables are used in the following code.
1238 * Each contain various points on the curve, where each point is three field
1239 * elements (x, y, z).
1240 *
1241 * For the base point table, z is usually 1 (0 for the point at infinity).
1242 * This table has 16 elements:
1243 * index | bits | point
1244 * ------+---------+------------------------------
1245 * 0 | 0 0 0 0 | 0G
1246 * 1 | 0 0 0 1 | 1G
1247 * 2 | 0 0 1 0 | 2^130G
1248 * 3 | 0 0 1 1 | (2^130 + 1)G
1249 * 4 | 0 1 0 0 | 2^260G
1250 * 5 | 0 1 0 1 | (2^260 + 1)G
1251 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1252 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1253 * 8 | 1 0 0 0 | 2^390G
1254 * 9 | 1 0 0 1 | (2^390 + 1)G
1255 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1256 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1257 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1258 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1259 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1260 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1261 *
1262 * The reason for this is so that we can clock bits into four different
1263 * locations when doing simple scalar multiplies against the base point.
1264 *
1265 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1266
1267 /* gmul is the table of precomputed base points */
1268 static const felem gmul[16][3] =
1269 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1270 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1271 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1272 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1273 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1274 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1275 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1276 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1277 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1278 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1279 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1280 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1281 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1282 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1283 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1284 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1285 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1286 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1287 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1288 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1289 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1290 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1291 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1292 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1293 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1294 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1295 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1296 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1297 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1298 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1299 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1300 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1301 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1302 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1303 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1304 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1305 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1306 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1307 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1308 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1309 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1310 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1311 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1312 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1313 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1314 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1315 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1316 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1317 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1318 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1319 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1320 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1321 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1322 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1323 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1324 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1325 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1326 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1327 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1328 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1329 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1330 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1331 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1332 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1333 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1334 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1335 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1336 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1337 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1338 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1339 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1340 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1341 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1342 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1343 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1344 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1345 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1346 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1347 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1348 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1349 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1350 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1351 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1352 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1353 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1354 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1355 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1356 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1357 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1358 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1359 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1360 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1361 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1362 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1363 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1364 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1365 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1366 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1367 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1368 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1369 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1370 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1371 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1372 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1373 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1374 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1375 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1376 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1377
1378 /* select_point selects the |idx|th point from a precomputation table and
1379 * copies it to out. */
1380 static void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3],
1381 felem out[3])
1382 {
1383 unsigned i, j;
1384 limb *outlimbs = &out[0][0];
1385 memset(outlimbs, 0, 3 * sizeof(felem));
1386
1387 for (i = 0; i < size; i++)
1388 {
1389 const limb *inlimbs = &pre_comp[i][0][0];
1390 limb mask = i ^ idx;
1391 mask |= mask >> 4;
1392 mask |= mask >> 2;
1393 mask |= mask >> 1;
1394 mask &= 1;
1395 mask--;
1396 for (j = 0; j < NLIMBS * 3; j++)
1397 outlimbs[j] |= inlimbs[j] & mask;
1398 }
1399 }
1400
1401 /* get_bit returns the |i|th bit in |in| */
1402 static char get_bit(const felem_bytearray in, int i)
1403 {
1404 if (i < 0)
1405 return 0;
1406 return (in[i >> 3] >> (i & 7)) & 1;
1407 }
1408
1409 /* Interleaved point multiplication using precomputed point multiples:
1410 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1411 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1412 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1413 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1414 static void batch_mul(felem x_out, felem y_out, felem z_out,
1415 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1416 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1417 {
1418 int i, skip;
1419 unsigned num, gen_mul = (g_scalar != NULL);
1420 felem nq[3], tmp[4];
1421 limb bits;
1422 u8 sign, digit;
1423
1424 /* set nq to the point at infinity */
1425 memset(nq, 0, 3 * sizeof(felem));
1426
1427 /* Loop over all scalars msb-to-lsb, interleaving additions
1428 * of multiples of the generator (last quarter of rounds)
1429 * and additions of other points multiples (every 5th round).
1430 */
1431 skip = 1; /* save two point operations in the first round */
1432 for (i = (num_points ? 520 : 130); i >= 0; --i)
1433 {
1434 /* double */
1435 if (!skip)
1436 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1437
1438 /* add multiples of the generator */
1439 if (gen_mul && (i <= 130))
1440 {
1441 bits = get_bit(g_scalar, i + 390) << 3;
1442 if (i < 130)
1443 {
1444 bits |= get_bit(g_scalar, i + 260) << 2;
1445 bits |= get_bit(g_scalar, i + 130) << 1;
1446 bits |= get_bit(g_scalar, i);
1447 }
1448 /* select the point to add, in constant time */
1449 select_point(bits, 16, g_pre_comp, tmp);
1450 if (!skip)
1451 {
1452 point_add(nq[0], nq[1], nq[2],
1453 nq[0], nq[1], nq[2],
1454 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1455 }
1456 else
1457 {
1458 memcpy(nq, tmp, 3 * sizeof(felem));
1459 skip = 0;
1460 }
1461 }
1462
1463 /* do other additions every 5 doublings */
1464 if (num_points && (i % 5 == 0))
1465 {
1466 /* loop over all scalars */
1467 for (num = 0; num < num_points; ++num)
1468 {
1469 bits = get_bit(scalars[num], i + 4) << 5;
1470 bits |= get_bit(scalars[num], i + 3) << 4;
1471 bits |= get_bit(scalars[num], i + 2) << 3;
1472 bits |= get_bit(scalars[num], i + 1) << 2;
1473 bits |= get_bit(scalars[num], i) << 1;
1474 bits |= get_bit(scalars[num], i - 1);
1475 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1476
1477 /* select the point to add or subtract, in constant time */
1478 select_point(digit, 17, pre_comp[num], tmp);
1479 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1480 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1481
1482 if (!skip)
1483 {
1484 point_add(nq[0], nq[1], nq[2],
1485 nq[0], nq[1], nq[2],
1486 mixed, tmp[0], tmp[1], tmp[2]);
1487 }
1488 else
1489 {
1490 memcpy(nq, tmp, 3 * sizeof(felem));
1491 skip = 0;
1492 }
1493 }
1494 }
1495 }
1496 felem_assign(x_out, nq[0]);
1497 felem_assign(y_out, nq[1]);
1498 felem_assign(z_out, nq[2]);
1499 }
1500
1501
1502 /* Precomputation for the group generator. */
1503 typedef struct {
1504 felem g_pre_comp[16][3];
1505 int references;
1506 } NISTP521_PRE_COMP;
1507
1508 const EC_METHOD *EC_GFp_nistp521_method(void)
1509 {
1510 static const EC_METHOD ret = {
1511 EC_FLAGS_DEFAULT_OCT,
1512 NID_X9_62_prime_field,
1513 ec_GFp_nistp521_group_init,
1514 ec_GFp_simple_group_finish,
1515 ec_GFp_simple_group_clear_finish,
1516 ec_GFp_nist_group_copy,
1517 ec_GFp_nistp521_group_set_curve,
1518 ec_GFp_simple_group_get_curve,
1519 ec_GFp_simple_group_get_degree,
1520 ec_GFp_simple_group_check_discriminant,
1521 ec_GFp_simple_point_init,
1522 ec_GFp_simple_point_finish,
1523 ec_GFp_simple_point_clear_finish,
1524 ec_GFp_simple_point_copy,
1525 ec_GFp_simple_point_set_to_infinity,
1526 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1527 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1528 ec_GFp_simple_point_set_affine_coordinates,
1529 ec_GFp_nistp521_point_get_affine_coordinates,
1530 0 /* point_set_compressed_coordinates */,
1531 0 /* point2oct */,
1532 0 /* oct2point */,
1533 ec_GFp_simple_add,
1534 ec_GFp_simple_dbl,
1535 ec_GFp_simple_invert,
1536 ec_GFp_simple_is_at_infinity,
1537 ec_GFp_simple_is_on_curve,
1538 ec_GFp_simple_cmp,
1539 ec_GFp_simple_make_affine,
1540 ec_GFp_simple_points_make_affine,
1541 ec_GFp_nistp521_points_mul,
1542 ec_GFp_nistp521_precompute_mult,
1543 ec_GFp_nistp521_have_precompute_mult,
1544 ec_GFp_nist_field_mul,
1545 ec_GFp_nist_field_sqr,
1546 0 /* field_div */,
1547 0 /* field_encode */,
1548 0 /* field_decode */,
1549 0 /* field_set_to_one */ };
1550
1551 return &ret;
1552 }
1553
1554
1555 /******************************************************************************/
1556 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1557 */
1558
1559 static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1560 {
1561 NISTP521_PRE_COMP *ret = NULL;
1562 ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1563 if (!ret)
1564 {
1565 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1566 return ret;
1567 }
1568 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1569 ret->references = 1;
1570 return ret;
1571 }
1572
1573 static void *nistp521_pre_comp_dup(void *src_)
1574 {
1575 NISTP521_PRE_COMP *src = src_;
1576
1577 /* no need to actually copy, these objects never change! */
1578 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1579
1580 return src_;
1581 }
1582
1583 static void nistp521_pre_comp_free(void *pre_)
1584 {
1585 int i;
1586 NISTP521_PRE_COMP *pre = pre_;
1587
1588 if (!pre)
1589 return;
1590
1591 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1592 if (i > 0)
1593 return;
1594
1595 OPENSSL_free(pre);
1596 }
1597
1598 static void nistp521_pre_comp_clear_free(void *pre_)
1599 {
1600 int i;
1601 NISTP521_PRE_COMP *pre = pre_;
1602
1603 if (!pre)
1604 return;
1605
1606 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1607 if (i > 0)
1608 return;
1609
1610 OPENSSL_cleanse(pre, sizeof(*pre));
1611 OPENSSL_free(pre);
1612 }
1613
1614 /******************************************************************************/
1615 /* OPENSSL EC_METHOD FUNCTIONS
1616 */
1617
1618 int ec_GFp_nistp521_group_init(EC_GROUP *group)
1619 {
1620 int ret;
1621 ret = ec_GFp_simple_group_init(group);
1622 group->a_is_minus3 = 1;
1623 return ret;
1624 }
1625
1626 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1627 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1628 {
1629 int ret = 0;
1630 BN_CTX *new_ctx = NULL;
1631 BIGNUM *curve_p, *curve_a, *curve_b;
1632
1633 if (ctx == NULL)
1634 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1635 BN_CTX_start(ctx);
1636 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1637 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1638 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1639 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1640 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1641 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1642 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1643 (BN_cmp(curve_b, b)))
1644 {
1645 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1646 EC_R_WRONG_CURVE_PARAMETERS);
1647 goto err;
1648 }
1649 group->field_mod_func = BN_nist_mod_521;
1650 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1651 err:
1652 BN_CTX_end(ctx);
1653 if (new_ctx != NULL)
1654 BN_CTX_free(new_ctx);
1655 return ret;
1656 }
1657
1658 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1659 * (X', Y') = (X/Z^2, Y/Z^3) */
1660 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1661 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1662 {
1663 felem z1, z2, x_in, y_in, x_out, y_out;
1664 largefelem tmp;
1665
1666 if (EC_POINT_is_at_infinity(group, point))
1667 {
1668 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1669 EC_R_POINT_AT_INFINITY);
1670 return 0;
1671 }
1672 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1673 (!BN_to_felem(z1, &point->Z))) return 0;
1674 felem_inv(z2, z1);
1675 felem_square(tmp, z2); felem_reduce(z1, tmp);
1676 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1677 felem_contract(x_out, x_in);
1678 if (x != NULL)
1679 {
1680 if (!felem_to_BN(x, x_out))
1681 {
1682 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1683 return 0;
1684 }
1685 }
1686 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1687 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1688 felem_contract(y_out, y_in);
1689 if (y != NULL)
1690 {
1691 if (!felem_to_BN(y, y_out))
1692 {
1693 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1694 return 0;
1695 }
1696 }
1697 return 1;
1698 }
1699
1700 static void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */])
1701 {
1702 /* Runs in constant time, unless an input is the point at infinity
1703 * (which normally shouldn't happen). */
1704 ec_GFp_nistp_points_make_affine_internal(
1705 num,
1706 points,
1707 sizeof(felem),
1708 tmp_felems,
1709 (void (*)(void *)) felem_one,
1710 (int (*)(const void *)) felem_is_zero_int,
1711 (void (*)(void *, const void *)) felem_assign,
1712 (void (*)(void *, const void *)) felem_square_reduce,
1713 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1714 (void (*)(void *, const void *)) felem_inv,
1715 (void (*)(void *, const void *)) felem_contract);
1716 }
1717
1718 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1719 * Result is stored in r (r can equal one of the inputs). */
1720 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1721 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1722 const BIGNUM *scalars[], BN_CTX *ctx)
1723 {
1724 int ret = 0;
1725 int j;
1726 int mixed = 0;
1727 BN_CTX *new_ctx = NULL;
1728 BIGNUM *x, *y, *z, *tmp_scalar;
1729 felem_bytearray g_secret;
1730 felem_bytearray *secrets = NULL;
1731 felem (*pre_comp)[17][3] = NULL;
1732 felem *tmp_felems = NULL;
1733 felem_bytearray tmp;
1734 unsigned i, num_bytes;
1735 int have_pre_comp = 0;
1736 size_t num_points = num;
1737 felem x_in, y_in, z_in, x_out, y_out, z_out;
1738 NISTP521_PRE_COMP *pre = NULL;
1739 felem (*g_pre_comp)[3] = NULL;
1740 EC_POINT *generator = NULL;
1741 const EC_POINT *p = NULL;
1742 const BIGNUM *p_scalar = NULL;
1743
1744 if (ctx == NULL)
1745 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1746 BN_CTX_start(ctx);
1747 if (((x = BN_CTX_get(ctx)) == NULL) ||
1748 ((y = BN_CTX_get(ctx)) == NULL) ||
1749 ((z = BN_CTX_get(ctx)) == NULL) ||
1750 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1751 goto err;
1752
1753 if (scalar != NULL)
1754 {
1755 pre = EC_EX_DATA_get_data(group->extra_data,
1756 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1757 nistp521_pre_comp_clear_free);
1758 if (pre)
1759 /* we have precomputation, try to use it */
1760 g_pre_comp = &pre->g_pre_comp[0];
1761 else
1762 /* try to use the standard precomputation */
1763 g_pre_comp = (felem (*)[3]) gmul;
1764 generator = EC_POINT_new(group);
1765 if (generator == NULL)
1766 goto err;
1767 /* get the generator from precomputation */
1768 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1769 !felem_to_BN(y, g_pre_comp[1][1]) ||
1770 !felem_to_BN(z, g_pre_comp[1][2]))
1771 {
1772 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1773 goto err;
1774 }
1775 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1776 generator, x, y, z, ctx))
1777 goto err;
1778 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1779 /* precomputation matches generator */
1780 have_pre_comp = 1;
1781 else
1782 /* we don't have valid precomputation:
1783 * treat the generator as a random point */
1784 num_points++;
1785 }
1786
1787 if (num_points > 0)
1788 {
1789 if (num_points >= 2)
1790 {
1791 /* unless we precompute multiples for just one point,
1792 * converting those into affine form is time well spent */
1793 mixed = 1;
1794 }
1795 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1796 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1797 if (mixed)
1798 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1799 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1800 {
1801 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1802 goto err;
1803 }
1804
1805 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1806 * i.e., they contribute nothing to the linear combination */
1807 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1808 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1809 for (i = 0; i < num_points; ++i)
1810 {
1811 if (i == num)
1812 /* we didn't have a valid precomputation, so we pick
1813 * the generator */
1814 {
1815 p = EC_GROUP_get0_generator(group);
1816 p_scalar = scalar;
1817 }
1818 else
1819 /* the i^th point */
1820 {
1821 p = points[i];
1822 p_scalar = scalars[i];
1823 }
1824 if ((p_scalar != NULL) && (p != NULL))
1825 {
1826 /* reduce scalar to 0 <= scalar < 2^521 */
1827 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
1828 {
1829 /* this is an unusual input, and we don't guarantee
1830 * constant-timeness */
1831 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1832 {
1833 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1834 goto err;
1835 }
1836 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1837 }
1838 else
1839 num_bytes = BN_bn2bin(p_scalar, tmp);
1840 flip_endian(secrets[i], tmp, num_bytes);
1841 /* precompute multiples */
1842 if ((!BN_to_felem(x_out, &p->X)) ||
1843 (!BN_to_felem(y_out, &p->Y)) ||
1844 (!BN_to_felem(z_out, &p->Z))) goto err;
1845 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1846 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1847 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1848 for (j = 2; j <= 16; ++j)
1849 {
1850 if (j & 1)
1851 {
1852 point_add(
1853 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1854 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1855 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1856 }
1857 else
1858 {
1859 point_double(
1860 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1861 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1862 }
1863 }
1864 }
1865 }
1866 if (mixed)
1867 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1868 }
1869
1870 /* the scalar for the generator */
1871 if ((scalar != NULL) && (have_pre_comp))
1872 {
1873 memset(g_secret, 0, sizeof(g_secret));
1874 /* reduce scalar to 0 <= scalar < 2^521 */
1875 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
1876 {
1877 /* this is an unusual input, and we don't guarantee
1878 * constant-timeness */
1879 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1880 {
1881 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1882 goto err;
1883 }
1884 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1885 }
1886 else
1887 num_bytes = BN_bn2bin(scalar, tmp);
1888 flip_endian(g_secret, tmp, num_bytes);
1889 /* do the multiplication with generator precomputation*/
1890 batch_mul(x_out, y_out, z_out,
1891 (const felem_bytearray (*)) secrets, num_points,
1892 g_secret,
1893 mixed, (const felem (*)[17][3]) pre_comp,
1894 (const felem (*)[3]) g_pre_comp);
1895 }
1896 else
1897 /* do the multiplication without generator precomputation */
1898 batch_mul(x_out, y_out, z_out,
1899 (const felem_bytearray (*)) secrets, num_points,
1900 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1901 /* reduce the output to its unique minimal representation */
1902 felem_contract(x_in, x_out);
1903 felem_contract(y_in, y_out);
1904 felem_contract(z_in, z_out);
1905 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1906 (!felem_to_BN(z, z_in)))
1907 {
1908 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1909 goto err;
1910 }
1911 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1912
1913 err:
1914 BN_CTX_end(ctx);
1915 if (generator != NULL)
1916 EC_POINT_free(generator);
1917 if (new_ctx != NULL)
1918 BN_CTX_free(new_ctx);
1919 if (secrets != NULL)
1920 OPENSSL_free(secrets);
1921 if (pre_comp != NULL)
1922 OPENSSL_free(pre_comp);
1923 if (tmp_felems != NULL)
1924 OPENSSL_free(tmp_felems);
1925 return ret;
1926 }
1927
1928 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1929 {
1930 int ret = 0;
1931 NISTP521_PRE_COMP *pre = NULL;
1932 int i, j;
1933 BN_CTX *new_ctx = NULL;
1934 BIGNUM *x, *y;
1935 EC_POINT *generator = NULL;
1936 felem tmp_felems[16];
1937
1938 /* throw away old precomputation */
1939 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
1940 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1941 if (ctx == NULL)
1942 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1943 BN_CTX_start(ctx);
1944 if (((x = BN_CTX_get(ctx)) == NULL) ||
1945 ((y = BN_CTX_get(ctx)) == NULL))
1946 goto err;
1947 /* get the generator */
1948 if (group->generator == NULL) goto err;
1949 generator = EC_POINT_new(group);
1950 if (generator == NULL)
1951 goto err;
1952 BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
1953 BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
1954 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1955 goto err;
1956 if ((pre = nistp521_pre_comp_new()) == NULL)
1957 goto err;
1958 /* if the generator is the standard one, use built-in precomputation */
1959 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1960 {
1961 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1962 ret = 1;
1963 goto err;
1964 }
1965 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
1966 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
1967 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
1968 goto err;
1969 /* compute 2^130*G, 2^260*G, 2^390*G */
1970 for (i = 1; i <= 4; i <<= 1)
1971 {
1972 point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
1973 pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
1974 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1975 for (j = 0; j < 129; ++j)
1976 {
1977 point_double(pre->g_pre_comp[2*i][0],
1978 pre->g_pre_comp[2*i][1],
1979 pre->g_pre_comp[2*i][2],
1980 pre->g_pre_comp[2*i][0],
1981 pre->g_pre_comp[2*i][1],
1982 pre->g_pre_comp[2*i][2]);
1983 }
1984 }
1985 /* g_pre_comp[0] is the point at infinity */
1986 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1987 /* the remaining multiples */
1988 /* 2^130*G + 2^260*G */
1989 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
1990 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
1991 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
1992 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1993 pre->g_pre_comp[2][2]);
1994 /* 2^130*G + 2^390*G */
1995 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
1996 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
1997 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1998 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1999 pre->g_pre_comp[2][2]);
2000 /* 2^260*G + 2^390*G */
2001 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2002 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2003 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2004 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2005 pre->g_pre_comp[4][2]);
2006 /* 2^130*G + 2^260*G + 2^390*G */
2007 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2008 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2009 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2010 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2011 pre->g_pre_comp[2][2]);
2012 for (i = 1; i < 8; ++i)
2013 {
2014 /* odd multiples: add G */
2015 point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
2016 pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
2017 pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
2018 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2019 pre->g_pre_comp[1][2]);
2020 }
2021 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2022
2023 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2024 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
2025 goto err;
2026 ret = 1;
2027 pre = NULL;
2028 err:
2029 BN_CTX_end(ctx);
2030 if (generator != NULL)
2031 EC_POINT_free(generator);
2032 if (new_ctx != NULL)
2033 BN_CTX_free(new_ctx);
2034 if (pre)
2035 nistp521_pre_comp_free(pre);
2036 return ret;
2037 }
2038
2039 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2040 {
2041 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2042 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2043 != NULL)
2044 return 1;
2045 else
2046 return 0;
2047 }
2048
2049 #else
2050 static void *dummy=&dummy;
2051 #endif