]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp521.c
Fix warnings.
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
1 /* crypto/ec/ecp_nistp521.c */
2 /*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32 #include <stdint.h>
33 #include <string.h>
34 #include <openssl/err.h>
35 #include "ec_lcl.h"
36
37 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
40 #else
41 #error "Need GCC 3.1 or later to define type uint128_t"
42 #endif
43
44 typedef uint8_t u8;
45 typedef uint64_t u64;
46 typedef int64_t s64;
47
48 /* The underlying field.
49 *
50 * P521 operates over GF(2^521-1). We can serialise an element of this field
51 * into 66 bytes where the most significant byte contains only a single bit. We
52 * call this an felem_bytearray. */
53
54 typedef u8 felem_bytearray[66];
55
56 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
57 * These values are big-endian. */
58 static const felem_bytearray nistp521_curve_params[5] =
59 {
60 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
61 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
62 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff},
69 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xfc},
78 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
79 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
80 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
81 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
82 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
83 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
84 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
85 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
86 0x3f, 0x00},
87 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
88 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
89 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
90 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
91 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
92 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
93 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
94 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
95 0xbd, 0x66},
96 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
97 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
98 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
99 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
100 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
101 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
102 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
103 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
104 0x66, 0x50}
105 };
106
107 /* The representation of field elements.
108 * ------------------------------------
109 *
110 * We represent field elements with nine values. These values are either 64 or
111 * 128 bits and the field element represented is:
112 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
113 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
114 * 58 bits apart, but are greater than 58 bits in length, the most significant
115 * bits of each limb overlap with the least significant bits of the next.
116 *
117 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
118 * 'largefelem' */
119
120 #define NLIMBS 9
121
122 typedef uint64_t limb;
123 typedef limb felem[NLIMBS];
124 typedef uint128_t largefelem[NLIMBS];
125
126 static const limb bottom57bits = 0x1ffffffffffffff;
127 static const limb bottom58bits = 0x3ffffffffffffff;
128
129 /* bin66_to_felem takes a little-endian byte array and converts it into felem
130 * form. This assumes that the CPU is little-endian. */
131 static void bin66_to_felem(felem out, const u8 in[66])
132 {
133 out[0] = (*((limb*) &in[0])) & bottom58bits;
134 out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
135 out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
136 out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
137 out[4] = (*((limb*) &in[29])) & bottom58bits;
138 out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
139 out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
140 out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
141 out[8] = (*((limb*) &in[58])) & bottom57bits;
142 }
143
144 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
145 * array. This assumes that the CPU is little-endian. */
146 static void felem_to_bin66(u8 out[66], const felem in)
147 {
148 memset(out, 0, 66);
149 (*((limb*) &out[0])) = in[0];
150 (*((limb*) &out[7])) |= in[1] << 2;
151 (*((limb*) &out[14])) |= in[2] << 4;
152 (*((limb*) &out[21])) |= in[3] << 6;
153 (*((limb*) &out[29])) = in[4];
154 (*((limb*) &out[36])) |= in[5] << 2;
155 (*((limb*) &out[43])) |= in[6] << 4;
156 (*((limb*) &out[50])) |= in[7] << 6;
157 (*((limb*) &out[58])) = in[8];
158 }
159
160 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
161 static void flip_endian(u8 *out, const u8 *in, unsigned len)
162 {
163 unsigned i;
164 for (i = 0; i < len; ++i)
165 out[i] = in[len-1-i];
166 }
167
168 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
169 static int BN_to_felem(felem out, const BIGNUM *bn)
170 {
171 felem_bytearray b_in;
172 felem_bytearray b_out;
173 unsigned num_bytes;
174
175 /* BN_bn2bin eats leading zeroes */
176 memset(b_out, 0, sizeof b_out);
177 num_bytes = BN_num_bytes(bn);
178 if (num_bytes > sizeof b_out)
179 {
180 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
181 return 0;
182 }
183 if (BN_is_negative(bn))
184 {
185 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
186 return 0;
187 }
188 num_bytes = BN_bn2bin(bn, b_in);
189 flip_endian(b_out, b_in, num_bytes);
190 bin66_to_felem(out, b_out);
191 return 1;
192 }
193
194 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
195 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
196 {
197 felem_bytearray b_in, b_out;
198 felem_to_bin66(b_in, in);
199 flip_endian(b_out, b_in, sizeof b_out);
200 return BN_bin2bn(b_out, sizeof b_out, out);
201 }
202
203
204 /* Field operations
205 * ---------------- */
206
207 static void felem_one(felem out)
208 {
209 out[0] = 1;
210 out[1] = 0;
211 out[2] = 0;
212 out[3] = 0;
213 out[4] = 0;
214 out[5] = 0;
215 out[6] = 0;
216 out[7] = 0;
217 out[8] = 0;
218 }
219
220 static void felem_assign(felem out, const felem in)
221 {
222 out[0] = in[0];
223 out[1] = in[1];
224 out[2] = in[2];
225 out[3] = in[3];
226 out[4] = in[4];
227 out[5] = in[5];
228 out[6] = in[6];
229 out[7] = in[7];
230 out[8] = in[8];
231 }
232
233 /* felem_sum64 sets out = out + in. */
234 static void felem_sum64(felem out, const felem in)
235 {
236 out[0] += in[0];
237 out[1] += in[1];
238 out[2] += in[2];
239 out[3] += in[3];
240 out[4] += in[4];
241 out[5] += in[5];
242 out[6] += in[6];
243 out[7] += in[7];
244 out[8] += in[8];
245 }
246
247 /* felem_scalar sets out = in * scalar */
248 static void felem_scalar(felem out, const felem in, limb scalar)
249 {
250 out[0] = in[0] * scalar;
251 out[1] = in[1] * scalar;
252 out[2] = in[2] * scalar;
253 out[3] = in[3] * scalar;
254 out[4] = in[4] * scalar;
255 out[5] = in[5] * scalar;
256 out[6] = in[6] * scalar;
257 out[7] = in[7] * scalar;
258 out[8] = in[8] * scalar;
259 }
260
261 /* felem_scalar64 sets out = out * scalar */
262 static void felem_scalar64(felem out, limb scalar)
263 {
264 out[0] *= scalar;
265 out[1] *= scalar;
266 out[2] *= scalar;
267 out[3] *= scalar;
268 out[4] *= scalar;
269 out[5] *= scalar;
270 out[6] *= scalar;
271 out[7] *= scalar;
272 out[8] *= scalar;
273 }
274
275 /* felem_scalar128 sets out = out * scalar */
276 static void felem_scalar128(largefelem out, limb scalar)
277 {
278 out[0] *= scalar;
279 out[1] *= scalar;
280 out[2] *= scalar;
281 out[3] *= scalar;
282 out[4] *= scalar;
283 out[5] *= scalar;
284 out[6] *= scalar;
285 out[7] *= scalar;
286 out[8] *= scalar;
287 }
288
289 /* felem_neg sets |out| to |-in|
290 * On entry:
291 * in[i] < 2^59 + 2^14
292 * On exit:
293 * out[i] < 2^62
294 */
295 static void felem_neg(felem out, const felem in)
296 {
297 /* In order to prevent underflow, we subtract from 0 mod p. */
298 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
299 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
300
301 out[0] = two62m3 - in[0];
302 out[1] = two62m2 - in[1];
303 out[2] = two62m2 - in[2];
304 out[3] = two62m2 - in[3];
305 out[4] = two62m2 - in[4];
306 out[5] = two62m2 - in[5];
307 out[6] = two62m2 - in[6];
308 out[7] = two62m2 - in[7];
309 out[8] = two62m2 - in[8];
310 }
311
312 /* felem_diff64 subtracts |in| from |out|
313 * On entry:
314 * in[i] < 2^59 + 2^14
315 * On exit:
316 * out[i] < out[i] + 2^62
317 */
318 static void felem_diff64(felem out, const felem in)
319 {
320 /* In order to prevent underflow, we add 0 mod p before subtracting. */
321 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
322 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
323
324 out[0] += two62m3 - in[0];
325 out[1] += two62m2 - in[1];
326 out[2] += two62m2 - in[2];
327 out[3] += two62m2 - in[3];
328 out[4] += two62m2 - in[4];
329 out[5] += two62m2 - in[5];
330 out[6] += two62m2 - in[6];
331 out[7] += two62m2 - in[7];
332 out[8] += two62m2 - in[8];
333 }
334
335 /* felem_diff_128_64 subtracts |in| from |out|
336 * On entry:
337 * in[i] < 2^62 + 2^17
338 * On exit:
339 * out[i] < out[i] + 2^63
340 */
341 static void felem_diff_128_64(largefelem out, const felem in)
342 {
343 /* In order to prevent underflow, we add 0 mod p before subtracting. */
344 static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
345 static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
346
347 out[0] += two63m6 - in[0];
348 out[1] += two63m5 - in[1];
349 out[2] += two63m5 - in[2];
350 out[3] += two63m5 - in[3];
351 out[4] += two63m5 - in[4];
352 out[5] += two63m5 - in[5];
353 out[6] += two63m5 - in[6];
354 out[7] += two63m5 - in[7];
355 out[8] += two63m5 - in[8];
356 }
357
358 /* felem_diff_128_64 subtracts |in| from |out|
359 * On entry:
360 * in[i] < 2^126
361 * On exit:
362 * out[i] < out[i] + 2^127 - 2^69
363 */
364 static void felem_diff128(largefelem out, const largefelem in)
365 {
366 /* In order to prevent underflow, we add 0 mod p before subtracting. */
367 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
368 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
369
370 out[0] += (two127m70 - in[0]);
371 out[1] += (two127m69 - in[1]);
372 out[2] += (two127m69 - in[2]);
373 out[3] += (two127m69 - in[3]);
374 out[4] += (two127m69 - in[4]);
375 out[5] += (two127m69 - in[5]);
376 out[6] += (two127m69 - in[6]);
377 out[7] += (two127m69 - in[7]);
378 out[8] += (two127m69 - in[8]);
379 }
380
381 /* felem_square sets |out| = |in|^2
382 * On entry:
383 * in[i] < 2^62
384 * On exit:
385 * out[i] < 17 * max(in[i]) * max(in[i])
386 */
387 static void felem_square(largefelem out, const felem in)
388 {
389 felem inx2, inx4;
390 felem_scalar(inx2, in, 2);
391 felem_scalar(inx4, in, 4);
392
393 /* We have many cases were we want to do
394 * in[x] * in[y] +
395 * in[y] * in[x]
396 * This is obviously just
397 * 2 * in[x] * in[y]
398 * However, rather than do the doubling on the 128 bit result, we
399 * double one of the inputs to the multiplication by reading from
400 * |inx2| */
401
402 out[0] = ((uint128_t) in[0]) * in[0];
403 out[1] = ((uint128_t) in[0]) * inx2[1];
404 out[2] = ((uint128_t) in[0]) * inx2[2] +
405 ((uint128_t) in[1]) * in[1];
406 out[3] = ((uint128_t) in[0]) * inx2[3] +
407 ((uint128_t) in[1]) * inx2[2];
408 out[4] = ((uint128_t) in[0]) * inx2[4] +
409 ((uint128_t) in[1]) * inx2[3] +
410 ((uint128_t) in[2]) * in[2];
411 out[5] = ((uint128_t) in[0]) * inx2[5] +
412 ((uint128_t) in[1]) * inx2[4] +
413 ((uint128_t) in[2]) * inx2[3];
414 out[6] = ((uint128_t) in[0]) * inx2[6] +
415 ((uint128_t) in[1]) * inx2[5] +
416 ((uint128_t) in[2]) * inx2[4] +
417 ((uint128_t) in[3]) * in[3];
418 out[7] = ((uint128_t) in[0]) * inx2[7] +
419 ((uint128_t) in[1]) * inx2[6] +
420 ((uint128_t) in[2]) * inx2[5] +
421 ((uint128_t) in[3]) * inx2[4];
422 out[8] = ((uint128_t) in[0]) * inx2[8] +
423 ((uint128_t) in[1]) * inx2[7] +
424 ((uint128_t) in[2]) * inx2[6] +
425 ((uint128_t) in[3]) * inx2[5] +
426 ((uint128_t) in[4]) * in[4];
427
428 /* The remaining limbs fall above 2^521, with the first falling at
429 * 2^522. They correspond to locations one bit up from the limbs
430 * produced above so we would have to multiply by two to align them.
431 * Again, rather than operate on the 128-bit result, we double one of
432 * the inputs to the multiplication. If we want to double for both this
433 * reason, and the reason above, then we end up multiplying by four. */
434
435 /* 9 */
436 out[0] += ((uint128_t) in[1]) * inx4[8] +
437 ((uint128_t) in[2]) * inx4[7] +
438 ((uint128_t) in[3]) * inx4[6] +
439 ((uint128_t) in[4]) * inx4[5];
440
441 /* 10 */
442 out[1] += ((uint128_t) in[2]) * inx4[8] +
443 ((uint128_t) in[3]) * inx4[7] +
444 ((uint128_t) in[4]) * inx4[6] +
445 ((uint128_t) in[5]) * inx2[5];
446
447 /* 11 */
448 out[2] += ((uint128_t) in[3]) * inx4[8] +
449 ((uint128_t) in[4]) * inx4[7] +
450 ((uint128_t) in[5]) * inx4[6];
451
452 /* 12 */
453 out[3] += ((uint128_t) in[4]) * inx4[8] +
454 ((uint128_t) in[5]) * inx4[7] +
455 ((uint128_t) in[6]) * inx2[6];
456
457 /* 13 */
458 out[4] += ((uint128_t) in[5]) * inx4[8] +
459 ((uint128_t) in[6]) * inx4[7];
460
461 /* 14 */
462 out[5] += ((uint128_t) in[6]) * inx4[8] +
463 ((uint128_t) in[7]) * inx2[7];
464
465 /* 15 */
466 out[6] += ((uint128_t) in[7]) * inx4[8];
467
468 /* 16 */
469 out[7] += ((uint128_t) in[8]) * inx2[8];
470 }
471
472 /* felem_mul sets |out| = |in1| * |in2|
473 * On entry:
474 * in1[i] < 2^64
475 * in2[i] < 2^63
476 * On exit:
477 * out[i] < 17 * max(in1[i]) * max(in2[i])
478 */
479 static void felem_mul(largefelem out, const felem in1, const felem in2)
480 {
481 felem in2x2;
482 felem_scalar(in2x2, in2, 2);
483
484 out[0] = ((uint128_t) in1[0]) * in2[0];
485
486 out[1] = ((uint128_t) in1[0]) * in2[1] +
487 ((uint128_t) in1[1]) * in2[0];
488
489 out[2] = ((uint128_t) in1[0]) * in2[2] +
490 ((uint128_t) in1[1]) * in2[1] +
491 ((uint128_t) in1[2]) * in2[0];
492
493 out[3] = ((uint128_t) in1[0]) * in2[3] +
494 ((uint128_t) in1[1]) * in2[2] +
495 ((uint128_t) in1[2]) * in2[1] +
496 ((uint128_t) in1[3]) * in2[0];
497
498 out[4] = ((uint128_t) in1[0]) * in2[4] +
499 ((uint128_t) in1[1]) * in2[3] +
500 ((uint128_t) in1[2]) * in2[2] +
501 ((uint128_t) in1[3]) * in2[1] +
502 ((uint128_t) in1[4]) * in2[0];
503
504 out[5] = ((uint128_t) in1[0]) * in2[5] +
505 ((uint128_t) in1[1]) * in2[4] +
506 ((uint128_t) in1[2]) * in2[3] +
507 ((uint128_t) in1[3]) * in2[2] +
508 ((uint128_t) in1[4]) * in2[1] +
509 ((uint128_t) in1[5]) * in2[0];
510
511 out[6] = ((uint128_t) in1[0]) * in2[6] +
512 ((uint128_t) in1[1]) * in2[5] +
513 ((uint128_t) in1[2]) * in2[4] +
514 ((uint128_t) in1[3]) * in2[3] +
515 ((uint128_t) in1[4]) * in2[2] +
516 ((uint128_t) in1[5]) * in2[1] +
517 ((uint128_t) in1[6]) * in2[0];
518
519 out[7] = ((uint128_t) in1[0]) * in2[7] +
520 ((uint128_t) in1[1]) * in2[6] +
521 ((uint128_t) in1[2]) * in2[5] +
522 ((uint128_t) in1[3]) * in2[4] +
523 ((uint128_t) in1[4]) * in2[3] +
524 ((uint128_t) in1[5]) * in2[2] +
525 ((uint128_t) in1[6]) * in2[1] +
526 ((uint128_t) in1[7]) * in2[0];
527
528 out[8] = ((uint128_t) in1[0]) * in2[8] +
529 ((uint128_t) in1[1]) * in2[7] +
530 ((uint128_t) in1[2]) * in2[6] +
531 ((uint128_t) in1[3]) * in2[5] +
532 ((uint128_t) in1[4]) * in2[4] +
533 ((uint128_t) in1[5]) * in2[3] +
534 ((uint128_t) in1[6]) * in2[2] +
535 ((uint128_t) in1[7]) * in2[1] +
536 ((uint128_t) in1[8]) * in2[0];
537
538 /* See comment in felem_square about the use of in2x2 here */
539
540 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
541 ((uint128_t) in1[2]) * in2x2[7] +
542 ((uint128_t) in1[3]) * in2x2[6] +
543 ((uint128_t) in1[4]) * in2x2[5] +
544 ((uint128_t) in1[5]) * in2x2[4] +
545 ((uint128_t) in1[6]) * in2x2[3] +
546 ((uint128_t) in1[7]) * in2x2[2] +
547 ((uint128_t) in1[8]) * in2x2[1];
548
549 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
550 ((uint128_t) in1[3]) * in2x2[7] +
551 ((uint128_t) in1[4]) * in2x2[6] +
552 ((uint128_t) in1[5]) * in2x2[5] +
553 ((uint128_t) in1[6]) * in2x2[4] +
554 ((uint128_t) in1[7]) * in2x2[3] +
555 ((uint128_t) in1[8]) * in2x2[2];
556
557 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
558 ((uint128_t) in1[4]) * in2x2[7] +
559 ((uint128_t) in1[5]) * in2x2[6] +
560 ((uint128_t) in1[6]) * in2x2[5] +
561 ((uint128_t) in1[7]) * in2x2[4] +
562 ((uint128_t) in1[8]) * in2x2[3];
563
564 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
565 ((uint128_t) in1[5]) * in2x2[7] +
566 ((uint128_t) in1[6]) * in2x2[6] +
567 ((uint128_t) in1[7]) * in2x2[5] +
568 ((uint128_t) in1[8]) * in2x2[4];
569
570 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
571 ((uint128_t) in1[6]) * in2x2[7] +
572 ((uint128_t) in1[7]) * in2x2[6] +
573 ((uint128_t) in1[8]) * in2x2[5];
574
575 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
576 ((uint128_t) in1[7]) * in2x2[7] +
577 ((uint128_t) in1[8]) * in2x2[6];
578
579 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
580 ((uint128_t) in1[8]) * in2x2[7];
581
582 out[7] += ((uint128_t) in1[8]) * in2x2[8];
583 }
584
585 static const limb bottom52bits = 0xfffffffffffff;
586
587 /* felem_reduce converts a largefelem to an felem.
588 * On entry:
589 * in[i] < 2^128
590 * On exit:
591 * out[i] < 2^59 + 2^14
592 */
593 static void felem_reduce(felem out, const largefelem in)
594 {
595 u64 overflow1, overflow2;
596
597 out[0] = ((limb) in[0]) & bottom58bits;
598 out[1] = ((limb) in[1]) & bottom58bits;
599 out[2] = ((limb) in[2]) & bottom58bits;
600 out[3] = ((limb) in[3]) & bottom58bits;
601 out[4] = ((limb) in[4]) & bottom58bits;
602 out[5] = ((limb) in[5]) & bottom58bits;
603 out[6] = ((limb) in[6]) & bottom58bits;
604 out[7] = ((limb) in[7]) & bottom58bits;
605 out[8] = ((limb) in[8]) & bottom58bits;
606
607 /* out[i] < 2^58 */
608
609 out[1] += ((limb) in[0]) >> 58;
610 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
611 /* out[1] < 2^58 + 2^6 + 2^58
612 * = 2^59 + 2^6 */
613 out[2] += ((limb) (in[0] >> 64)) >> 52;
614
615 out[2] += ((limb) in[1]) >> 58;
616 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
617 out[3] += ((limb) (in[1] >> 64)) >> 52;
618
619 out[3] += ((limb) in[2]) >> 58;
620 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
621 out[4] += ((limb) (in[2] >> 64)) >> 52;
622
623 out[4] += ((limb) in[3]) >> 58;
624 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
625 out[5] += ((limb) (in[3] >> 64)) >> 52;
626
627 out[5] += ((limb) in[4]) >> 58;
628 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
629 out[6] += ((limb) (in[4] >> 64)) >> 52;
630
631 out[6] += ((limb) in[5]) >> 58;
632 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
633 out[7] += ((limb) (in[5] >> 64)) >> 52;
634
635 out[7] += ((limb) in[6]) >> 58;
636 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
637 out[8] += ((limb) (in[6] >> 64)) >> 52;
638
639 out[8] += ((limb) in[7]) >> 58;
640 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
641 /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
642 * < 2^59 + 2^13 */
643 overflow1 = ((limb) (in[7] >> 64)) >> 52;
644
645 overflow1 += ((limb) in[8]) >> 58;
646 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
647 overflow2 = ((limb) (in[8] >> 64)) >> 52;
648
649 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
650 overflow2 <<= 1; /* overflow2 < 2^13 */
651
652 out[0] += overflow1; /* out[0] < 2^60 */
653 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
654
655 out[1] += out[0] >> 58; out[0] &= bottom58bits;
656 /* out[0] < 2^58
657 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
658 * < 2^59 + 2^14 */
659 }
660
661 static void felem_square_reduce(felem out, const felem in)
662 {
663 largefelem tmp;
664 felem_square(tmp, in);
665 felem_reduce(out, tmp);
666 }
667
668 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
669 {
670 largefelem tmp;
671 felem_mul(tmp, in1, in2);
672 felem_reduce(out, tmp);
673 }
674
675 /* felem_inv calculates |out| = |in|^{-1}
676 *
677 * Based on Fermat's Little Theorem:
678 * a^p = a (mod p)
679 * a^{p-1} = 1 (mod p)
680 * a^{p-2} = a^{-1} (mod p)
681 */
682 static void felem_inv(felem out, const felem in)
683 {
684 felem ftmp, ftmp2, ftmp3, ftmp4;
685 largefelem tmp;
686 unsigned i;
687
688 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
689 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
690 felem_assign(ftmp2, ftmp);
691 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
692 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
693 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
694
695 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
696 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
697 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
698
699 felem_assign(ftmp2, ftmp3);
700 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
701 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
702 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
703 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
704 felem_assign(ftmp4, ftmp3);
705 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
706 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
707 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
708 felem_assign(ftmp2, ftmp3);
709
710 for (i = 0; i < 8; i++)
711 {
712 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
713 }
714 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
715 felem_assign(ftmp2, ftmp3);
716
717 for (i = 0; i < 16; i++)
718 {
719 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
720 }
721 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
722 felem_assign(ftmp2, ftmp3);
723
724 for (i = 0; i < 32; i++)
725 {
726 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
727 }
728 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
729 felem_assign(ftmp2, ftmp3);
730
731 for (i = 0; i < 64; i++)
732 {
733 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
734 }
735 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
736 felem_assign(ftmp2, ftmp3);
737
738 for (i = 0; i < 128; i++)
739 {
740 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
741 }
742 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
743 felem_assign(ftmp2, ftmp3);
744
745 for (i = 0; i < 256; i++)
746 {
747 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
748 }
749 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
750
751 for (i = 0; i < 9; i++)
752 {
753 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
754 }
755 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
756 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
757 }
758
759 /* This is 2^521-1, expressed as an felem */
760 static const felem kPrime =
761 {
762 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
763 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
764 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
765 };
766
767 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
768 * otherwise.
769 * On entry:
770 * in[i] < 2^59 + 2^14
771 */
772 static limb felem_is_zero(const felem in)
773 {
774 felem ftmp;
775 limb is_zero, is_p;
776 felem_assign(ftmp, in);
777
778 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
779 /* ftmp[8] < 2^57 */
780 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
781 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
782 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
783 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
784 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
785 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
786 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
787 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
788 /* ftmp[8] < 2^57 + 4 */
789
790 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
791 * greater than our bound for ftmp[8]. Therefore we only have to check
792 * if the zero is zero or 2^521-1. */
793
794 is_zero = 0;
795 is_zero |= ftmp[0];
796 is_zero |= ftmp[1];
797 is_zero |= ftmp[2];
798 is_zero |= ftmp[3];
799 is_zero |= ftmp[4];
800 is_zero |= ftmp[5];
801 is_zero |= ftmp[6];
802 is_zero |= ftmp[7];
803 is_zero |= ftmp[8];
804
805 is_zero--;
806 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
807 * can be set is if is_zero was 0 before the decrement. */
808 is_zero = ((s64) is_zero) >> 63;
809
810 is_p = ftmp[0] ^ kPrime[0];
811 is_p |= ftmp[1] ^ kPrime[1];
812 is_p |= ftmp[2] ^ kPrime[2];
813 is_p |= ftmp[3] ^ kPrime[3];
814 is_p |= ftmp[4] ^ kPrime[4];
815 is_p |= ftmp[5] ^ kPrime[5];
816 is_p |= ftmp[6] ^ kPrime[6];
817 is_p |= ftmp[7] ^ kPrime[7];
818 is_p |= ftmp[8] ^ kPrime[8];
819
820 is_p--;
821 is_p = ((s64) is_p) >> 63;
822
823 is_zero |= is_p;
824 return is_zero;
825 }
826
827 static int felem_is_zero_int(const felem in)
828 {
829 return (int) (felem_is_zero(in) & ((limb)1));
830 }
831
832 /* felem_contract converts |in| to its unique, minimal representation.
833 * On entry:
834 * in[i] < 2^59 + 2^14
835 */
836 static void felem_contract(felem out, const felem in)
837 {
838 limb is_p, is_greater, sign;
839 static const limb two58 = ((limb)1) << 58;
840
841 felem_assign(out, in);
842
843 out[0] += out[8] >> 57; out[8] &= bottom57bits;
844 /* out[8] < 2^57 */
845 out[1] += out[0] >> 58; out[0] &= bottom58bits;
846 out[2] += out[1] >> 58; out[1] &= bottom58bits;
847 out[3] += out[2] >> 58; out[2] &= bottom58bits;
848 out[4] += out[3] >> 58; out[3] &= bottom58bits;
849 out[5] += out[4] >> 58; out[4] &= bottom58bits;
850 out[6] += out[5] >> 58; out[5] &= bottom58bits;
851 out[7] += out[6] >> 58; out[6] &= bottom58bits;
852 out[8] += out[7] >> 58; out[7] &= bottom58bits;
853 /* out[8] < 2^57 + 4 */
854
855 /* If the value is greater than 2^521-1 then we have to subtract
856 * 2^521-1 out. See the comments in felem_is_zero regarding why we
857 * don't test for other multiples of the prime. */
858
859 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
860
861 is_p = out[0] ^ kPrime[0];
862 is_p |= out[1] ^ kPrime[1];
863 is_p |= out[2] ^ kPrime[2];
864 is_p |= out[3] ^ kPrime[3];
865 is_p |= out[4] ^ kPrime[4];
866 is_p |= out[5] ^ kPrime[5];
867 is_p |= out[6] ^ kPrime[6];
868 is_p |= out[7] ^ kPrime[7];
869 is_p |= out[8] ^ kPrime[8];
870
871 is_p--;
872 is_p &= is_p << 32;
873 is_p &= is_p << 16;
874 is_p &= is_p << 8;
875 is_p &= is_p << 4;
876 is_p &= is_p << 2;
877 is_p &= is_p << 1;
878 is_p = ((s64) is_p) >> 63;
879 is_p = ~is_p;
880
881 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
882
883 out[0] &= is_p;
884 out[1] &= is_p;
885 out[2] &= is_p;
886 out[3] &= is_p;
887 out[4] &= is_p;
888 out[5] &= is_p;
889 out[6] &= is_p;
890 out[7] &= is_p;
891 out[8] &= is_p;
892
893 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
894 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
895 is_greater = out[8] >> 57;
896 is_greater |= is_greater << 32;
897 is_greater |= is_greater << 16;
898 is_greater |= is_greater << 8;
899 is_greater |= is_greater << 4;
900 is_greater |= is_greater << 2;
901 is_greater |= is_greater << 1;
902 is_greater = ((s64) is_greater) >> 63;
903
904 out[0] -= kPrime[0] & is_greater;
905 out[1] -= kPrime[1] & is_greater;
906 out[2] -= kPrime[2] & is_greater;
907 out[3] -= kPrime[3] & is_greater;
908 out[4] -= kPrime[4] & is_greater;
909 out[5] -= kPrime[5] & is_greater;
910 out[6] -= kPrime[6] & is_greater;
911 out[7] -= kPrime[7] & is_greater;
912 out[8] -= kPrime[8] & is_greater;
913
914 /* Eliminate negative coefficients */
915 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
916 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
917 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
918 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
919 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
920 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
921 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
922 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
923 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
924 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
925 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
926 }
927
928 /* Group operations
929 * ----------------
930 *
931 * Building on top of the field operations we have the operations on the
932 * elliptic curve group itself. Points on the curve are represented in Jacobian
933 * coordinates */
934
935 /* point_double calcuates 2*(x_in, y_in, z_in)
936 *
937 * The method is taken from:
938 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
939 *
940 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
941 * while x_out == y_in is not (maybe this works, but it's not tested). */
942 static void
943 point_double(felem x_out, felem y_out, felem z_out,
944 const felem x_in, const felem y_in, const felem z_in)
945 {
946 largefelem tmp, tmp2;
947 felem delta, gamma, beta, alpha, ftmp, ftmp2;
948
949 felem_assign(ftmp, x_in);
950 felem_assign(ftmp2, x_in);
951
952 /* delta = z^2 */
953 felem_square(tmp, z_in);
954 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
955
956 /* gamma = y^2 */
957 felem_square(tmp, y_in);
958 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
959
960 /* beta = x*gamma */
961 felem_mul(tmp, x_in, gamma);
962 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
963
964 /* alpha = 3*(x-delta)*(x+delta) */
965 felem_diff64(ftmp, delta);
966 /* ftmp[i] < 2^61 */
967 felem_sum64(ftmp2, delta);
968 /* ftmp2[i] < 2^60 + 2^15 */
969 felem_scalar64(ftmp2, 3);
970 /* ftmp2[i] < 3*2^60 + 3*2^15 */
971 felem_mul(tmp, ftmp, ftmp2);
972 /* tmp[i] < 17(3*2^121 + 3*2^76)
973 * = 61*2^121 + 61*2^76
974 * < 64*2^121 + 64*2^76
975 * = 2^127 + 2^82
976 * < 2^128 */
977 felem_reduce(alpha, tmp);
978
979 /* x' = alpha^2 - 8*beta */
980 felem_square(tmp, alpha);
981 /* tmp[i] < 17*2^120
982 * < 2^125 */
983 felem_assign(ftmp, beta);
984 felem_scalar64(ftmp, 8);
985 /* ftmp[i] < 2^62 + 2^17 */
986 felem_diff_128_64(tmp, ftmp);
987 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
988 felem_reduce(x_out, tmp);
989
990 /* z' = (y + z)^2 - gamma - delta */
991 felem_sum64(delta, gamma);
992 /* delta[i] < 2^60 + 2^15 */
993 felem_assign(ftmp, y_in);
994 felem_sum64(ftmp, z_in);
995 /* ftmp[i] < 2^60 + 2^15 */
996 felem_square(tmp, ftmp);
997 /* tmp[i] < 17(2^122)
998 * < 2^127 */
999 felem_diff_128_64(tmp, delta);
1000 /* tmp[i] < 2^127 + 2^63 */
1001 felem_reduce(z_out, tmp);
1002
1003 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1004 felem_scalar64(beta, 4);
1005 /* beta[i] < 2^61 + 2^16 */
1006 felem_diff64(beta, x_out);
1007 /* beta[i] < 2^61 + 2^60 + 2^16 */
1008 felem_mul(tmp, alpha, beta);
1009 /* tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1010 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1011 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1012 * < 2^128 */
1013 felem_square(tmp2, gamma);
1014 /* tmp2[i] < 17*(2^59 + 2^14)^2
1015 * = 17*(2^118 + 2^74 + 2^28) */
1016 felem_scalar128(tmp2, 8);
1017 /* tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1018 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1019 * < 2^126 */
1020 felem_diff128(tmp, tmp2);
1021 /* tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1022 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1023 * 2^74 + 2^69 + 2^34 + 2^30
1024 * < 2^128 */
1025 felem_reduce(y_out, tmp);
1026 }
1027
1028 /* copy_conditional copies in to out iff mask is all ones. */
1029 static void
1030 copy_conditional(felem out, const felem in, limb mask)
1031 {
1032 unsigned i;
1033 for (i = 0; i < NLIMBS; ++i)
1034 {
1035 const limb tmp = mask & (in[i] ^ out[i]);
1036 out[i] ^= tmp;
1037 }
1038 }
1039
1040 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1041 *
1042 * The method is taken from
1043 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1044 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1045 *
1046 * This function includes a branch for checking whether the two input points
1047 * are equal (while not equal to the point at infinity). This case never
1048 * happens during single point multiplication, so there is no timing leak for
1049 * ECDH or ECDSA signing. */
1050 static void point_add(felem x3, felem y3, felem z3,
1051 const felem x1, const felem y1, const felem z1,
1052 const int mixed, const felem x2, const felem y2, const felem z2)
1053 {
1054 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1055 largefelem tmp, tmp2;
1056 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1057
1058 z1_is_zero = felem_is_zero(z1);
1059 z2_is_zero = felem_is_zero(z2);
1060
1061 /* ftmp = z1z1 = z1**2 */
1062 felem_square(tmp, z1);
1063 felem_reduce(ftmp, tmp);
1064
1065 if (!mixed)
1066 {
1067 /* ftmp2 = z2z2 = z2**2 */
1068 felem_square(tmp, z2);
1069 felem_reduce(ftmp2, tmp);
1070
1071 /* u1 = ftmp3 = x1*z2z2 */
1072 felem_mul(tmp, x1, ftmp2);
1073 felem_reduce(ftmp3, tmp);
1074
1075 /* ftmp5 = z1 + z2 */
1076 felem_assign(ftmp5, z1);
1077 felem_sum64(ftmp5, z2);
1078 /* ftmp5[i] < 2^61 */
1079
1080 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1081 felem_square(tmp, ftmp5);
1082 /* tmp[i] < 17*2^122 */
1083 felem_diff_128_64(tmp, ftmp);
1084 /* tmp[i] < 17*2^122 + 2^63 */
1085 felem_diff_128_64(tmp, ftmp2);
1086 /* tmp[i] < 17*2^122 + 2^64 */
1087 felem_reduce(ftmp5, tmp);
1088
1089 /* ftmp2 = z2 * z2z2 */
1090 felem_mul(tmp, ftmp2, z2);
1091 felem_reduce(ftmp2, tmp);
1092
1093 /* s1 = ftmp6 = y1 * z2**3 */
1094 felem_mul(tmp, y1, ftmp2);
1095 felem_reduce(ftmp6, tmp);
1096 }
1097 else
1098 {
1099 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1100
1101 /* u1 = ftmp3 = x1*z2z2 */
1102 felem_assign(ftmp3, x1);
1103
1104 /* ftmp5 = 2*z1z2 */
1105 felem_scalar(ftmp5, z1, 2);
1106
1107 /* s1 = ftmp6 = y1 * z2**3 */
1108 felem_assign(ftmp6, y1);
1109 }
1110
1111 /* u2 = x2*z1z1 */
1112 felem_mul(tmp, x2, ftmp);
1113 /* tmp[i] < 17*2^120 */
1114
1115 /* h = ftmp4 = u2 - u1 */
1116 felem_diff_128_64(tmp, ftmp3);
1117 /* tmp[i] < 17*2^120 + 2^63 */
1118 felem_reduce(ftmp4, tmp);
1119
1120 x_equal = felem_is_zero(ftmp4);
1121
1122 /* z_out = ftmp5 * h */
1123 felem_mul(tmp, ftmp5, ftmp4);
1124 felem_reduce(z_out, tmp);
1125
1126 /* ftmp = z1 * z1z1 */
1127 felem_mul(tmp, ftmp, z1);
1128 felem_reduce(ftmp, tmp);
1129
1130 /* s2 = tmp = y2 * z1**3 */
1131 felem_mul(tmp, y2, ftmp);
1132 /* tmp[i] < 17*2^120 */
1133
1134 /* r = ftmp5 = (s2 - s1)*2 */
1135 felem_diff_128_64(tmp, ftmp6);
1136 /* tmp[i] < 17*2^120 + 2^63 */
1137 felem_reduce(ftmp5, tmp);
1138 y_equal = felem_is_zero(ftmp5);
1139 felem_scalar64(ftmp5, 2);
1140 /* ftmp5[i] < 2^61 */
1141
1142 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1143 {
1144 point_double(x3, y3, z3, x1, y1, z1);
1145 return;
1146 }
1147
1148 /* I = ftmp = (2h)**2 */
1149 felem_assign(ftmp, ftmp4);
1150 felem_scalar64(ftmp, 2);
1151 /* ftmp[i] < 2^61 */
1152 felem_square(tmp, ftmp);
1153 /* tmp[i] < 17*2^122 */
1154 felem_reduce(ftmp, tmp);
1155
1156 /* J = ftmp2 = h * I */
1157 felem_mul(tmp, ftmp4, ftmp);
1158 felem_reduce(ftmp2, tmp);
1159
1160 /* V = ftmp4 = U1 * I */
1161 felem_mul(tmp, ftmp3, ftmp);
1162 felem_reduce(ftmp4, tmp);
1163
1164 /* x_out = r**2 - J - 2V */
1165 felem_square(tmp, ftmp5);
1166 /* tmp[i] < 17*2^122 */
1167 felem_diff_128_64(tmp, ftmp2);
1168 /* tmp[i] < 17*2^122 + 2^63 */
1169 felem_assign(ftmp3, ftmp4);
1170 felem_scalar64(ftmp4, 2);
1171 /* ftmp4[i] < 2^61 */
1172 felem_diff_128_64(tmp, ftmp4);
1173 /* tmp[i] < 17*2^122 + 2^64 */
1174 felem_reduce(x_out, tmp);
1175
1176 /* y_out = r(V-x_out) - 2 * s1 * J */
1177 felem_diff64(ftmp3, x_out);
1178 /* ftmp3[i] < 2^60 + 2^60
1179 * = 2^61 */
1180 felem_mul(tmp, ftmp5, ftmp3);
1181 /* tmp[i] < 17*2^122 */
1182 felem_mul(tmp2, ftmp6, ftmp2);
1183 /* tmp2[i] < 17*2^120 */
1184 felem_scalar128(tmp2, 2);
1185 /* tmp2[i] < 17*2^121 */
1186 felem_diff128(tmp, tmp2);
1187 /* tmp[i] < 2^127 - 2^69 + 17*2^122
1188 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1189 * < 2^127 */
1190 felem_reduce(y_out, tmp);
1191
1192 copy_conditional(x_out, x2, z1_is_zero);
1193 copy_conditional(x_out, x1, z2_is_zero);
1194 copy_conditional(y_out, y2, z1_is_zero);
1195 copy_conditional(y_out, y1, z2_is_zero);
1196 copy_conditional(z_out, z2, z1_is_zero);
1197 copy_conditional(z_out, z1, z2_is_zero);
1198 felem_assign(x3, x_out);
1199 felem_assign(y3, y_out);
1200 felem_assign(z3, z_out);
1201 }
1202
1203 /* Base point pre computation
1204 * --------------------------
1205 *
1206 * Two different sorts of precomputed tables are used in the following code.
1207 * Each contain various points on the curve, where each point is three field
1208 * elements (x, y, z).
1209 *
1210 * For the base point table, z is usually 1 (0 for the point at infinity).
1211 * This table has 16 elements:
1212 * index | bits | point
1213 * ------+---------+------------------------------
1214 * 0 | 0 0 0 0 | 0G
1215 * 1 | 0 0 0 1 | 1G
1216 * 2 | 0 0 1 0 | 2^130G
1217 * 3 | 0 0 1 1 | (2^130 + 1)G
1218 * 4 | 0 1 0 0 | 2^260G
1219 * 5 | 0 1 0 1 | (2^260 + 1)G
1220 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1221 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1222 * 8 | 1 0 0 0 | 2^390G
1223 * 9 | 1 0 0 1 | (2^390 + 1)G
1224 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1225 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1226 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1227 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1228 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1229 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1230 *
1231 * The reason for this is so that we can clock bits into four different
1232 * locations when doing simple scalar multiplies against the base point.
1233 *
1234 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1235
1236 /* gmul is the table of precomputed base points */
1237 static const felem gmul[16][3] =
1238 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1239 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1240 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1241 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1242 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1243 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1244 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1245 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1246 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1247 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1248 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1249 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1250 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1251 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1252 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1253 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1254 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1255 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1256 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1257 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1258 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1259 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1260 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1261 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1262 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1263 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1264 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1265 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1266 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1267 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1268 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1269 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1270 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1271 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1272 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1273 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1274 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1275 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1276 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1277 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1278 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1279 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1280 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1281 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1282 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1283 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1284 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1285 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1286 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1287 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1288 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1289 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1290 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1291 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1292 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1293 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1294 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1295 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1296 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1297 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1298 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1299 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1300 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1301 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1302 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1303 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1304 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1305 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1306 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1307 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1308 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1309 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1310 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1311 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1312 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1313 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1314 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1315 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1316 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1317 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1318 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1319 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1320 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1321 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1322 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1323 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1324 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1325 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1326 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1327 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1328 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1329 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1330 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1331 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1332 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1333 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1334 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1335 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1336 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1337 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1338 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1339 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1340 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1341 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1342 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1343 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1344 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1345 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1346
1347 /* select_point selects the |idx|th point from a precomputation table and
1348 * copies it to out. */
1349 static void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3],
1350 felem out[3])
1351 {
1352 unsigned i, j;
1353 limb *outlimbs = &out[0][0];
1354 memset(outlimbs, 0, 3 * sizeof(felem));
1355
1356 for (i = 0; i < size; i++)
1357 {
1358 const limb *inlimbs = &pre_comp[i][0][0];
1359 limb mask = i ^ idx;
1360 mask |= mask >> 4;
1361 mask |= mask >> 2;
1362 mask |= mask >> 1;
1363 mask &= 1;
1364 mask--;
1365 for (j = 0; j < NLIMBS * 3; j++)
1366 outlimbs[j] |= inlimbs[j] & mask;
1367 }
1368 }
1369
1370 /* get_bit returns the |i|th bit in |in| */
1371 static char get_bit(const felem_bytearray in, int i)
1372 {
1373 if (i < 0)
1374 return 0;
1375 return (in[i >> 3] >> (i & 7)) & 1;
1376 }
1377
1378 /* Interleaved point multiplication using precomputed point multiples:
1379 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1380 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1381 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1382 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1383 static void batch_mul(felem x_out, felem y_out, felem z_out,
1384 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1385 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1386 {
1387 int i, skip;
1388 unsigned num, gen_mul = (g_scalar != NULL);
1389 felem nq[3], tmp[4];
1390 limb bits;
1391 u8 sign, digit;
1392
1393 /* set nq to the point at infinity */
1394 memset(nq, 0, 3 * sizeof(felem));
1395
1396 /* Loop over all scalars msb-to-lsb, interleaving additions
1397 * of multiples of the generator (last quarter of rounds)
1398 * and additions of other points multiples (every 5th round).
1399 */
1400 skip = 1; /* save two point operations in the first round */
1401 for (i = (num_points ? 520 : 130); i >= 0; --i)
1402 {
1403 /* double */
1404 if (!skip)
1405 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1406
1407 /* add multiples of the generator */
1408 if (gen_mul && (i <= 130))
1409 {
1410 bits = get_bit(g_scalar, i + 390) << 3;
1411 if (i < 130)
1412 {
1413 bits |= get_bit(g_scalar, i + 260) << 2;
1414 bits |= get_bit(g_scalar, i + 130) << 1;
1415 bits |= get_bit(g_scalar, i);
1416 }
1417 /* select the point to add, in constant time */
1418 select_point(bits, 16, g_pre_comp, tmp);
1419 if (!skip)
1420 {
1421 point_add(nq[0], nq[1], nq[2],
1422 nq[0], nq[1], nq[2],
1423 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1424 }
1425 else
1426 {
1427 memcpy(nq, tmp, 3 * sizeof(felem));
1428 skip = 0;
1429 }
1430 }
1431
1432 /* do other additions every 5 doublings */
1433 if (num_points && (i % 5 == 0))
1434 {
1435 /* loop over all scalars */
1436 for (num = 0; num < num_points; ++num)
1437 {
1438 bits = get_bit(scalars[num], i + 4) << 5;
1439 bits |= get_bit(scalars[num], i + 3) << 4;
1440 bits |= get_bit(scalars[num], i + 2) << 3;
1441 bits |= get_bit(scalars[num], i + 1) << 2;
1442 bits |= get_bit(scalars[num], i) << 1;
1443 bits |= get_bit(scalars[num], i - 1);
1444 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1445
1446 /* select the point to add or subtract, in constant time */
1447 select_point(digit, 17, pre_comp[num], tmp);
1448 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1449 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1450
1451 if (!skip)
1452 {
1453 point_add(nq[0], nq[1], nq[2],
1454 nq[0], nq[1], nq[2],
1455 mixed, tmp[0], tmp[1], tmp[2]);
1456 }
1457 else
1458 {
1459 memcpy(nq, tmp, 3 * sizeof(felem));
1460 skip = 0;
1461 }
1462 }
1463 }
1464 }
1465 felem_assign(x_out, nq[0]);
1466 felem_assign(y_out, nq[1]);
1467 felem_assign(z_out, nq[2]);
1468 }
1469
1470
1471 /* Precomputation for the group generator. */
1472 typedef struct {
1473 felem g_pre_comp[16][3];
1474 int references;
1475 } NISTP521_PRE_COMP;
1476
1477 const EC_METHOD *EC_GFp_nistp521_method(void)
1478 {
1479 static const EC_METHOD ret = {
1480 EC_FLAGS_DEFAULT_OCT,
1481 NID_X9_62_prime_field,
1482 ec_GFp_nistp521_group_init,
1483 ec_GFp_simple_group_finish,
1484 ec_GFp_simple_group_clear_finish,
1485 ec_GFp_nist_group_copy,
1486 ec_GFp_nistp521_group_set_curve,
1487 ec_GFp_simple_group_get_curve,
1488 ec_GFp_simple_group_get_degree,
1489 ec_GFp_simple_group_check_discriminant,
1490 ec_GFp_simple_point_init,
1491 ec_GFp_simple_point_finish,
1492 ec_GFp_simple_point_clear_finish,
1493 ec_GFp_simple_point_copy,
1494 ec_GFp_simple_point_set_to_infinity,
1495 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1496 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1497 ec_GFp_simple_point_set_affine_coordinates,
1498 ec_GFp_nistp521_point_get_affine_coordinates,
1499 0 /* point_set_compressed_coordinates */,
1500 0 /* point2oct */,
1501 0 /* oct2point */,
1502 ec_GFp_simple_add,
1503 ec_GFp_simple_dbl,
1504 ec_GFp_simple_invert,
1505 ec_GFp_simple_is_at_infinity,
1506 ec_GFp_simple_is_on_curve,
1507 ec_GFp_simple_cmp,
1508 ec_GFp_simple_make_affine,
1509 ec_GFp_simple_points_make_affine,
1510 ec_GFp_nistp521_points_mul,
1511 ec_GFp_nistp521_precompute_mult,
1512 ec_GFp_nistp521_have_precompute_mult,
1513 ec_GFp_nist_field_mul,
1514 ec_GFp_nist_field_sqr,
1515 0 /* field_div */,
1516 0 /* field_encode */,
1517 0 /* field_decode */,
1518 0 /* field_set_to_one */ };
1519
1520 return &ret;
1521 }
1522
1523
1524 /******************************************************************************/
1525 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1526 */
1527
1528 static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1529 {
1530 NISTP521_PRE_COMP *ret = NULL;
1531 ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1532 if (!ret)
1533 {
1534 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1535 return ret;
1536 }
1537 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1538 ret->references = 1;
1539 return ret;
1540 }
1541
1542 static void *nistp521_pre_comp_dup(void *src_)
1543 {
1544 NISTP521_PRE_COMP *src = src_;
1545
1546 /* no need to actually copy, these objects never change! */
1547 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1548
1549 return src_;
1550 }
1551
1552 static void nistp521_pre_comp_free(void *pre_)
1553 {
1554 int i;
1555 NISTP521_PRE_COMP *pre = pre_;
1556
1557 if (!pre)
1558 return;
1559
1560 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1561 if (i > 0)
1562 return;
1563
1564 OPENSSL_free(pre);
1565 }
1566
1567 static void nistp521_pre_comp_clear_free(void *pre_)
1568 {
1569 int i;
1570 NISTP521_PRE_COMP *pre = pre_;
1571
1572 if (!pre)
1573 return;
1574
1575 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1576 if (i > 0)
1577 return;
1578
1579 OPENSSL_cleanse(pre, sizeof(*pre));
1580 OPENSSL_free(pre);
1581 }
1582
1583 /******************************************************************************/
1584 /* OPENSSL EC_METHOD FUNCTIONS
1585 */
1586
1587 int ec_GFp_nistp521_group_init(EC_GROUP *group)
1588 {
1589 int ret;
1590 ret = ec_GFp_simple_group_init(group);
1591 group->a_is_minus3 = 1;
1592 return ret;
1593 }
1594
1595 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1596 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1597 {
1598 int ret = 0;
1599 BN_CTX *new_ctx = NULL;
1600 BIGNUM *curve_p, *curve_a, *curve_b;
1601
1602 if (ctx == NULL)
1603 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1604 BN_CTX_start(ctx);
1605 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1606 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1607 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1608 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1609 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1610 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1611 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1612 (BN_cmp(curve_b, b)))
1613 {
1614 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1615 EC_R_WRONG_CURVE_PARAMETERS);
1616 goto err;
1617 }
1618 group->field_mod_func = BN_nist_mod_521;
1619 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1620 err:
1621 BN_CTX_end(ctx);
1622 if (new_ctx != NULL)
1623 BN_CTX_free(new_ctx);
1624 return ret;
1625 }
1626
1627 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1628 * (X', Y') = (X/Z^2, Y/Z^3) */
1629 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1630 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1631 {
1632 felem z1, z2, x_in, y_in, x_out, y_out;
1633 largefelem tmp;
1634
1635 if (EC_POINT_is_at_infinity(group, point))
1636 {
1637 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1638 EC_R_POINT_AT_INFINITY);
1639 return 0;
1640 }
1641 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1642 (!BN_to_felem(z1, &point->Z))) return 0;
1643 felem_inv(z2, z1);
1644 felem_square(tmp, z2); felem_reduce(z1, tmp);
1645 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1646 felem_contract(x_out, x_in);
1647 if (x != NULL)
1648 {
1649 if (!felem_to_BN(x, x_out))
1650 {
1651 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1652 return 0;
1653 }
1654 }
1655 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1656 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1657 felem_contract(y_out, y_in);
1658 if (y != NULL)
1659 {
1660 if (!felem_to_BN(y, y_out))
1661 {
1662 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1663 return 0;
1664 }
1665 }
1666 return 1;
1667 }
1668
1669 static void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */])
1670 {
1671 /* Runs in constant time, unless an input is the point at infinity
1672 * (which normally shouldn't happen). */
1673 ec_GFp_nistp_points_make_affine_internal(
1674 num,
1675 points,
1676 sizeof(felem),
1677 tmp_felems,
1678 (void (*)(void *)) felem_one,
1679 (int (*)(const void *)) felem_is_zero_int,
1680 (void (*)(void *, const void *)) felem_assign,
1681 (void (*)(void *, const void *)) felem_square_reduce,
1682 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1683 (void (*)(void *, const void *)) felem_inv,
1684 (void (*)(void *, const void *)) felem_contract);
1685 }
1686
1687 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1688 * Result is stored in r (r can equal one of the inputs). */
1689 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1690 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1691 const BIGNUM *scalars[], BN_CTX *ctx)
1692 {
1693 int ret = 0;
1694 int j;
1695 int mixed = 0;
1696 BN_CTX *new_ctx = NULL;
1697 BIGNUM *x, *y, *z, *tmp_scalar;
1698 felem_bytearray g_secret;
1699 felem_bytearray *secrets = NULL;
1700 felem (*pre_comp)[17][3] = NULL;
1701 felem *tmp_felems = NULL;
1702 felem_bytearray tmp;
1703 unsigned i, num_bytes;
1704 int have_pre_comp = 0;
1705 size_t num_points = num;
1706 felem x_in, y_in, z_in, x_out, y_out, z_out;
1707 NISTP521_PRE_COMP *pre = NULL;
1708 felem (*g_pre_comp)[3] = NULL;
1709 EC_POINT *generator = NULL;
1710 const EC_POINT *p = NULL;
1711 const BIGNUM *p_scalar = NULL;
1712
1713 if (ctx == NULL)
1714 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1715 BN_CTX_start(ctx);
1716 if (((x = BN_CTX_get(ctx)) == NULL) ||
1717 ((y = BN_CTX_get(ctx)) == NULL) ||
1718 ((z = BN_CTX_get(ctx)) == NULL) ||
1719 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1720 goto err;
1721
1722 if (scalar != NULL)
1723 {
1724 pre = EC_EX_DATA_get_data(group->extra_data,
1725 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1726 nistp521_pre_comp_clear_free);
1727 if (pre)
1728 /* we have precomputation, try to use it */
1729 g_pre_comp = &pre->g_pre_comp[0];
1730 else
1731 /* try to use the standard precomputation */
1732 g_pre_comp = (felem (*)[3]) gmul;
1733 generator = EC_POINT_new(group);
1734 if (generator == NULL)
1735 goto err;
1736 /* get the generator from precomputation */
1737 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1738 !felem_to_BN(y, g_pre_comp[1][1]) ||
1739 !felem_to_BN(z, g_pre_comp[1][2]))
1740 {
1741 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1742 goto err;
1743 }
1744 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1745 generator, x, y, z, ctx))
1746 goto err;
1747 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1748 /* precomputation matches generator */
1749 have_pre_comp = 1;
1750 else
1751 /* we don't have valid precomputation:
1752 * treat the generator as a random point */
1753 num_points++;
1754 }
1755
1756 if (num_points > 0)
1757 {
1758 if (num_points >= 2)
1759 {
1760 /* unless we precompute multiples for just one point,
1761 * converting those into affine form is time well spent */
1762 mixed = 1;
1763 }
1764 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1765 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1766 if (mixed)
1767 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1768 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1769 {
1770 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1771 goto err;
1772 }
1773
1774 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1775 * i.e., they contribute nothing to the linear combination */
1776 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1777 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1778 for (i = 0; i < num_points; ++i)
1779 {
1780 if (i == num)
1781 /* we didn't have a valid precomputation, so we pick
1782 * the generator */
1783 {
1784 p = EC_GROUP_get0_generator(group);
1785 p_scalar = scalar;
1786 }
1787 else
1788 /* the i^th point */
1789 {
1790 p = points[i];
1791 p_scalar = scalars[i];
1792 }
1793 if ((p_scalar != NULL) && (p != NULL))
1794 {
1795 /* reduce scalar to 0 <= scalar < 2^521 */
1796 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
1797 {
1798 /* this is an unusual input, and we don't guarantee
1799 * constant-timeness */
1800 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1801 {
1802 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1803 goto err;
1804 }
1805 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1806 }
1807 else
1808 num_bytes = BN_bn2bin(p_scalar, tmp);
1809 flip_endian(secrets[i], tmp, num_bytes);
1810 /* precompute multiples */
1811 if ((!BN_to_felem(x_out, &p->X)) ||
1812 (!BN_to_felem(y_out, &p->Y)) ||
1813 (!BN_to_felem(z_out, &p->Z))) goto err;
1814 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1815 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1816 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1817 for (j = 2; j <= 16; ++j)
1818 {
1819 if (j & 1)
1820 {
1821 point_add(
1822 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1823 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1824 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1825 }
1826 else
1827 {
1828 point_double(
1829 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1830 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1831 }
1832 }
1833 }
1834 }
1835 if (mixed)
1836 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1837 }
1838
1839 /* the scalar for the generator */
1840 if ((scalar != NULL) && (have_pre_comp))
1841 {
1842 memset(g_secret, 0, sizeof(g_secret));
1843 /* reduce scalar to 0 <= scalar < 2^521 */
1844 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
1845 {
1846 /* this is an unusual input, and we don't guarantee
1847 * constant-timeness */
1848 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1849 {
1850 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1851 goto err;
1852 }
1853 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1854 }
1855 else
1856 num_bytes = BN_bn2bin(scalar, tmp);
1857 flip_endian(g_secret, tmp, num_bytes);
1858 /* do the multiplication with generator precomputation*/
1859 batch_mul(x_out, y_out, z_out,
1860 (const felem_bytearray (*)) secrets, num_points,
1861 g_secret,
1862 mixed, (const felem (*)[17][3]) pre_comp,
1863 (const felem (*)[3]) g_pre_comp);
1864 }
1865 else
1866 /* do the multiplication without generator precomputation */
1867 batch_mul(x_out, y_out, z_out,
1868 (const felem_bytearray (*)) secrets, num_points,
1869 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1870 /* reduce the output to its unique minimal representation */
1871 felem_contract(x_in, x_out);
1872 felem_contract(y_in, y_out);
1873 felem_contract(z_in, z_out);
1874 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1875 (!felem_to_BN(z, z_in)))
1876 {
1877 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1878 goto err;
1879 }
1880 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1881
1882 err:
1883 BN_CTX_end(ctx);
1884 if (generator != NULL)
1885 EC_POINT_free(generator);
1886 if (new_ctx != NULL)
1887 BN_CTX_free(new_ctx);
1888 if (secrets != NULL)
1889 OPENSSL_free(secrets);
1890 if (pre_comp != NULL)
1891 OPENSSL_free(pre_comp);
1892 if (tmp_felems != NULL)
1893 OPENSSL_free(tmp_felems);
1894 return ret;
1895 }
1896
1897 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1898 {
1899 int ret = 0;
1900 NISTP521_PRE_COMP *pre = NULL;
1901 int i, j;
1902 BN_CTX *new_ctx = NULL;
1903 BIGNUM *x, *y;
1904 EC_POINT *generator = NULL;
1905 felem tmp_felems[16];
1906
1907 /* throw away old precomputation */
1908 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
1909 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1910 if (ctx == NULL)
1911 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1912 BN_CTX_start(ctx);
1913 if (((x = BN_CTX_get(ctx)) == NULL) ||
1914 ((y = BN_CTX_get(ctx)) == NULL))
1915 goto err;
1916 /* get the generator */
1917 if (group->generator == NULL) goto err;
1918 generator = EC_POINT_new(group);
1919 if (generator == NULL)
1920 goto err;
1921 BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
1922 BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
1923 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1924 goto err;
1925 if ((pre = nistp521_pre_comp_new()) == NULL)
1926 goto err;
1927 /* if the generator is the standard one, use built-in precomputation */
1928 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1929 {
1930 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1931 ret = 1;
1932 goto err;
1933 }
1934 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
1935 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
1936 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
1937 goto err;
1938 /* compute 2^130*G, 2^260*G, 2^390*G */
1939 for (i = 1; i <= 4; i <<= 1)
1940 {
1941 point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
1942 pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
1943 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1944 for (j = 0; j < 129; ++j)
1945 {
1946 point_double(pre->g_pre_comp[2*i][0],
1947 pre->g_pre_comp[2*i][1],
1948 pre->g_pre_comp[2*i][2],
1949 pre->g_pre_comp[2*i][0],
1950 pre->g_pre_comp[2*i][1],
1951 pre->g_pre_comp[2*i][2]);
1952 }
1953 }
1954 /* g_pre_comp[0] is the point at infinity */
1955 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1956 /* the remaining multiples */
1957 /* 2^130*G + 2^260*G */
1958 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
1959 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
1960 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
1961 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1962 pre->g_pre_comp[2][2]);
1963 /* 2^130*G + 2^390*G */
1964 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
1965 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
1966 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1967 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1968 pre->g_pre_comp[2][2]);
1969 /* 2^260*G + 2^390*G */
1970 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
1971 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
1972 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1973 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
1974 pre->g_pre_comp[4][2]);
1975 /* 2^130*G + 2^260*G + 2^390*G */
1976 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
1977 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
1978 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
1979 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1980 pre->g_pre_comp[2][2]);
1981 for (i = 1; i < 8; ++i)
1982 {
1983 /* odd multiples: add G */
1984 point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
1985 pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
1986 pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
1987 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
1988 pre->g_pre_comp[1][2]);
1989 }
1990 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
1991
1992 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
1993 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
1994 goto err;
1995 ret = 1;
1996 pre = NULL;
1997 err:
1998 BN_CTX_end(ctx);
1999 if (generator != NULL)
2000 EC_POINT_free(generator);
2001 if (new_ctx != NULL)
2002 BN_CTX_free(new_ctx);
2003 if (pre)
2004 nistp521_pre_comp_free(pre);
2005 return ret;
2006 }
2007
2008 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2009 {
2010 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2011 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2012 != NULL)
2013 return 1;
2014 else
2015 return 0;
2016 }
2017
2018 #else
2019 static void *dummy=&dummy;
2020 #endif