]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp521.c
Fix strange formatting by indent
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
1 /* crypto/ec/ecp_nistp521.c */
2 /*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32 #ifndef OPENSSL_SYS_VMS
33 #include <stdint.h>
34 #else
35 #include <inttypes.h>
36 #endif
37
38 #include <string.h>
39 #include <openssl/err.h>
40 #include "ec_lcl.h"
41
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #else
46 #error "Need GCC 3.1 or later to define type uint128_t"
47 #endif
48
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51 typedef int64_t s64;
52
53 /* The underlying field.
54 *
55 * P521 operates over GF(2^521-1). We can serialise an element of this field
56 * into 66 bytes where the most significant byte contains only a single bit. We
57 * call this an felem_bytearray. */
58
59 typedef u8 felem_bytearray[66];
60
61 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
62 * These values are big-endian. */
63 static const felem_bytearray nistp521_curve_params[5] =
64 {
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff},
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xfc},
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
91 0x3f, 0x00},
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
100 0xbd, 0x66},
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
109 0x66, 0x50}
110 };
111
112 /*-
113 * The representation of field elements.
114 * ------------------------------------
115 *
116 * We represent field elements with nine values. These values are either 64 or
117 * 128 bits and the field element represented is:
118 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
119 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
120 * 58 bits apart, but are greater than 58 bits in length, the most significant
121 * bits of each limb overlap with the least significant bits of the next.
122 *
123 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
124 * 'largefelem' */
125
126 #define NLIMBS 9
127
128 typedef uint64_t limb;
129 typedef limb felem[NLIMBS];
130 typedef uint128_t largefelem[NLIMBS];
131
132 static const limb bottom57bits = 0x1ffffffffffffff;
133 static const limb bottom58bits = 0x3ffffffffffffff;
134
135 /* bin66_to_felem takes a little-endian byte array and converts it into felem
136 * form. This assumes that the CPU is little-endian. */
137 static void bin66_to_felem(felem out, const u8 in[66])
138 {
139 out[0] = (*((limb*) &in[0])) & bottom58bits;
140 out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
141 out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
142 out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
143 out[4] = (*((limb*) &in[29])) & bottom58bits;
144 out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
145 out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
146 out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
147 out[8] = (*((limb*) &in[58])) & bottom57bits;
148 }
149
150 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
151 * array. This assumes that the CPU is little-endian. */
152 static void felem_to_bin66(u8 out[66], const felem in)
153 {
154 memset(out, 0, 66);
155 (*((limb*) &out[0])) = in[0];
156 (*((limb*) &out[7])) |= in[1] << 2;
157 (*((limb*) &out[14])) |= in[2] << 4;
158 (*((limb*) &out[21])) |= in[3] << 6;
159 (*((limb*) &out[29])) = in[4];
160 (*((limb*) &out[36])) |= in[5] << 2;
161 (*((limb*) &out[43])) |= in[6] << 4;
162 (*((limb*) &out[50])) |= in[7] << 6;
163 (*((limb*) &out[58])) = in[8];
164 }
165
166 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
167 static void flip_endian(u8 *out, const u8 *in, unsigned len)
168 {
169 unsigned i;
170 for (i = 0; i < len; ++i)
171 out[i] = in[len-1-i];
172 }
173
174 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
175 static int BN_to_felem(felem out, const BIGNUM *bn)
176 {
177 felem_bytearray b_in;
178 felem_bytearray b_out;
179 unsigned num_bytes;
180
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out, 0, sizeof b_out);
183 num_bytes = BN_num_bytes(bn);
184 if (num_bytes > sizeof b_out)
185 {
186 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
187 return 0;
188 }
189 if (BN_is_negative(bn))
190 {
191 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
192 return 0;
193 }
194 num_bytes = BN_bn2bin(bn, b_in);
195 flip_endian(b_out, b_in, num_bytes);
196 bin66_to_felem(out, b_out);
197 return 1;
198 }
199
200 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
201 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
202 {
203 felem_bytearray b_in, b_out;
204 felem_to_bin66(b_in, in);
205 flip_endian(b_out, b_in, sizeof b_out);
206 return BN_bin2bn(b_out, sizeof b_out, out);
207 }
208
209
210 /*-
211 * Field operations
212 * ----------------
213 */
214
215 static void felem_one(felem out)
216 {
217 out[0] = 1;
218 out[1] = 0;
219 out[2] = 0;
220 out[3] = 0;
221 out[4] = 0;
222 out[5] = 0;
223 out[6] = 0;
224 out[7] = 0;
225 out[8] = 0;
226 }
227
228 static void felem_assign(felem out, const felem in)
229 {
230 out[0] = in[0];
231 out[1] = in[1];
232 out[2] = in[2];
233 out[3] = in[3];
234 out[4] = in[4];
235 out[5] = in[5];
236 out[6] = in[6];
237 out[7] = in[7];
238 out[8] = in[8];
239 }
240
241 /* felem_sum64 sets out = out + in. */
242 static void felem_sum64(felem out, const felem in)
243 {
244 out[0] += in[0];
245 out[1] += in[1];
246 out[2] += in[2];
247 out[3] += in[3];
248 out[4] += in[4];
249 out[5] += in[5];
250 out[6] += in[6];
251 out[7] += in[7];
252 out[8] += in[8];
253 }
254
255 /* felem_scalar sets out = in * scalar */
256 static void felem_scalar(felem out, const felem in, limb scalar)
257 {
258 out[0] = in[0] * scalar;
259 out[1] = in[1] * scalar;
260 out[2] = in[2] * scalar;
261 out[3] = in[3] * scalar;
262 out[4] = in[4] * scalar;
263 out[5] = in[5] * scalar;
264 out[6] = in[6] * scalar;
265 out[7] = in[7] * scalar;
266 out[8] = in[8] * scalar;
267 }
268
269 /* felem_scalar64 sets out = out * scalar */
270 static void felem_scalar64(felem out, limb scalar)
271 {
272 out[0] *= scalar;
273 out[1] *= scalar;
274 out[2] *= scalar;
275 out[3] *= scalar;
276 out[4] *= scalar;
277 out[5] *= scalar;
278 out[6] *= scalar;
279 out[7] *= scalar;
280 out[8] *= scalar;
281 }
282
283 /* felem_scalar128 sets out = out * scalar */
284 static void felem_scalar128(largefelem out, limb scalar)
285 {
286 out[0] *= scalar;
287 out[1] *= scalar;
288 out[2] *= scalar;
289 out[3] *= scalar;
290 out[4] *= scalar;
291 out[5] *= scalar;
292 out[6] *= scalar;
293 out[7] *= scalar;
294 out[8] *= scalar;
295 }
296
297 /*-
298 * felem_neg sets |out| to |-in|
299 * On entry:
300 * in[i] < 2^59 + 2^14
301 * On exit:
302 * out[i] < 2^62
303 */
304 static void felem_neg(felem out, const felem in)
305 {
306 /* In order to prevent underflow, we subtract from 0 mod p. */
307 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
308 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
309
310 out[0] = two62m3 - in[0];
311 out[1] = two62m2 - in[1];
312 out[2] = two62m2 - in[2];
313 out[3] = two62m2 - in[3];
314 out[4] = two62m2 - in[4];
315 out[5] = two62m2 - in[5];
316 out[6] = two62m2 - in[6];
317 out[7] = two62m2 - in[7];
318 out[8] = two62m2 - in[8];
319 }
320
321 /*-
322 * felem_diff64 subtracts |in| from |out|
323 * On entry:
324 * in[i] < 2^59 + 2^14
325 * On exit:
326 * out[i] < out[i] + 2^62
327 */
328 static void felem_diff64(felem out, const felem in)
329 {
330 /* In order to prevent underflow, we add 0 mod p before subtracting. */
331 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
332 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
333
334 out[0] += two62m3 - in[0];
335 out[1] += two62m2 - in[1];
336 out[2] += two62m2 - in[2];
337 out[3] += two62m2 - in[3];
338 out[4] += two62m2 - in[4];
339 out[5] += two62m2 - in[5];
340 out[6] += two62m2 - in[6];
341 out[7] += two62m2 - in[7];
342 out[8] += two62m2 - in[8];
343 }
344
345 /*-
346 * felem_diff_128_64 subtracts |in| from |out|
347 * On entry:
348 * in[i] < 2^62 + 2^17
349 * On exit:
350 * out[i] < out[i] + 2^63
351 */
352 static void felem_diff_128_64(largefelem out, const felem in)
353 {
354 /* In order to prevent underflow, we add 0 mod p before subtracting. */
355 static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
356 static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
357
358 out[0] += two63m6 - in[0];
359 out[1] += two63m5 - in[1];
360 out[2] += two63m5 - in[2];
361 out[3] += two63m5 - in[3];
362 out[4] += two63m5 - in[4];
363 out[5] += two63m5 - in[5];
364 out[6] += two63m5 - in[6];
365 out[7] += two63m5 - in[7];
366 out[8] += two63m5 - in[8];
367 }
368
369 /*-
370 * felem_diff_128_64 subtracts |in| from |out|
371 * On entry:
372 * in[i] < 2^126
373 * On exit:
374 * out[i] < out[i] + 2^127 - 2^69
375 */
376 static void felem_diff128(largefelem out, const largefelem in)
377 {
378 /* In order to prevent underflow, we add 0 mod p before subtracting. */
379 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
380 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
381
382 out[0] += (two127m70 - in[0]);
383 out[1] += (two127m69 - in[1]);
384 out[2] += (two127m69 - in[2]);
385 out[3] += (two127m69 - in[3]);
386 out[4] += (two127m69 - in[4]);
387 out[5] += (two127m69 - in[5]);
388 out[6] += (two127m69 - in[6]);
389 out[7] += (two127m69 - in[7]);
390 out[8] += (two127m69 - in[8]);
391 }
392
393 /*-
394 * felem_square sets |out| = |in|^2
395 * On entry:
396 * in[i] < 2^62
397 * On exit:
398 * out[i] < 17 * max(in[i]) * max(in[i])
399 */
400 static void felem_square(largefelem out, const felem in)
401 {
402 felem inx2, inx4;
403 felem_scalar(inx2, in, 2);
404 felem_scalar(inx4, in, 4);
405
406 /*-
407 * We have many cases were we want to do
408 * in[x] * in[y] +
409 * in[y] * in[x]
410 * This is obviously just
411 * 2 * in[x] * in[y]
412 * However, rather than do the doubling on the 128 bit result, we
413 * double one of the inputs to the multiplication by reading from
414 * |inx2| */
415
416 out[0] = ((uint128_t) in[0]) * in[0];
417 out[1] = ((uint128_t) in[0]) * inx2[1];
418 out[2] = ((uint128_t) in[0]) * inx2[2] +
419 ((uint128_t) in[1]) * in[1];
420 out[3] = ((uint128_t) in[0]) * inx2[3] +
421 ((uint128_t) in[1]) * inx2[2];
422 out[4] = ((uint128_t) in[0]) * inx2[4] +
423 ((uint128_t) in[1]) * inx2[3] +
424 ((uint128_t) in[2]) * in[2];
425 out[5] = ((uint128_t) in[0]) * inx2[5] +
426 ((uint128_t) in[1]) * inx2[4] +
427 ((uint128_t) in[2]) * inx2[3];
428 out[6] = ((uint128_t) in[0]) * inx2[6] +
429 ((uint128_t) in[1]) * inx2[5] +
430 ((uint128_t) in[2]) * inx2[4] +
431 ((uint128_t) in[3]) * in[3];
432 out[7] = ((uint128_t) in[0]) * inx2[7] +
433 ((uint128_t) in[1]) * inx2[6] +
434 ((uint128_t) in[2]) * inx2[5] +
435 ((uint128_t) in[3]) * inx2[4];
436 out[8] = ((uint128_t) in[0]) * inx2[8] +
437 ((uint128_t) in[1]) * inx2[7] +
438 ((uint128_t) in[2]) * inx2[6] +
439 ((uint128_t) in[3]) * inx2[5] +
440 ((uint128_t) in[4]) * in[4];
441
442 /* The remaining limbs fall above 2^521, with the first falling at
443 * 2^522. They correspond to locations one bit up from the limbs
444 * produced above so we would have to multiply by two to align them.
445 * Again, rather than operate on the 128-bit result, we double one of
446 * the inputs to the multiplication. If we want to double for both this
447 * reason, and the reason above, then we end up multiplying by four. */
448
449 /* 9 */
450 out[0] += ((uint128_t) in[1]) * inx4[8] +
451 ((uint128_t) in[2]) * inx4[7] +
452 ((uint128_t) in[3]) * inx4[6] +
453 ((uint128_t) in[4]) * inx4[5];
454
455 /* 10 */
456 out[1] += ((uint128_t) in[2]) * inx4[8] +
457 ((uint128_t) in[3]) * inx4[7] +
458 ((uint128_t) in[4]) * inx4[6] +
459 ((uint128_t) in[5]) * inx2[5];
460
461 /* 11 */
462 out[2] += ((uint128_t) in[3]) * inx4[8] +
463 ((uint128_t) in[4]) * inx4[7] +
464 ((uint128_t) in[5]) * inx4[6];
465
466 /* 12 */
467 out[3] += ((uint128_t) in[4]) * inx4[8] +
468 ((uint128_t) in[5]) * inx4[7] +
469 ((uint128_t) in[6]) * inx2[6];
470
471 /* 13 */
472 out[4] += ((uint128_t) in[5]) * inx4[8] +
473 ((uint128_t) in[6]) * inx4[7];
474
475 /* 14 */
476 out[5] += ((uint128_t) in[6]) * inx4[8] +
477 ((uint128_t) in[7]) * inx2[7];
478
479 /* 15 */
480 out[6] += ((uint128_t) in[7]) * inx4[8];
481
482 /* 16 */
483 out[7] += ((uint128_t) in[8]) * inx2[8];
484 }
485
486 /*-
487 * felem_mul sets |out| = |in1| * |in2|
488 * On entry:
489 * in1[i] < 2^64
490 * in2[i] < 2^63
491 * On exit:
492 * out[i] < 17 * max(in1[i]) * max(in2[i])
493 */
494 static void felem_mul(largefelem out, const felem in1, const felem in2)
495 {
496 felem in2x2;
497 felem_scalar(in2x2, in2, 2);
498
499 out[0] = ((uint128_t) in1[0]) * in2[0];
500
501 out[1] = ((uint128_t) in1[0]) * in2[1] +
502 ((uint128_t) in1[1]) * in2[0];
503
504 out[2] = ((uint128_t) in1[0]) * in2[2] +
505 ((uint128_t) in1[1]) * in2[1] +
506 ((uint128_t) in1[2]) * in2[0];
507
508 out[3] = ((uint128_t) in1[0]) * in2[3] +
509 ((uint128_t) in1[1]) * in2[2] +
510 ((uint128_t) in1[2]) * in2[1] +
511 ((uint128_t) in1[3]) * in2[0];
512
513 out[4] = ((uint128_t) in1[0]) * in2[4] +
514 ((uint128_t) in1[1]) * in2[3] +
515 ((uint128_t) in1[2]) * in2[2] +
516 ((uint128_t) in1[3]) * in2[1] +
517 ((uint128_t) in1[4]) * in2[0];
518
519 out[5] = ((uint128_t) in1[0]) * in2[5] +
520 ((uint128_t) in1[1]) * in2[4] +
521 ((uint128_t) in1[2]) * in2[3] +
522 ((uint128_t) in1[3]) * in2[2] +
523 ((uint128_t) in1[4]) * in2[1] +
524 ((uint128_t) in1[5]) * in2[0];
525
526 out[6] = ((uint128_t) in1[0]) * in2[6] +
527 ((uint128_t) in1[1]) * in2[5] +
528 ((uint128_t) in1[2]) * in2[4] +
529 ((uint128_t) in1[3]) * in2[3] +
530 ((uint128_t) in1[4]) * in2[2] +
531 ((uint128_t) in1[5]) * in2[1] +
532 ((uint128_t) in1[6]) * in2[0];
533
534 out[7] = ((uint128_t) in1[0]) * in2[7] +
535 ((uint128_t) in1[1]) * in2[6] +
536 ((uint128_t) in1[2]) * in2[5] +
537 ((uint128_t) in1[3]) * in2[4] +
538 ((uint128_t) in1[4]) * in2[3] +
539 ((uint128_t) in1[5]) * in2[2] +
540 ((uint128_t) in1[6]) * in2[1] +
541 ((uint128_t) in1[7]) * in2[0];
542
543 out[8] = ((uint128_t) in1[0]) * in2[8] +
544 ((uint128_t) in1[1]) * in2[7] +
545 ((uint128_t) in1[2]) * in2[6] +
546 ((uint128_t) in1[3]) * in2[5] +
547 ((uint128_t) in1[4]) * in2[4] +
548 ((uint128_t) in1[5]) * in2[3] +
549 ((uint128_t) in1[6]) * in2[2] +
550 ((uint128_t) in1[7]) * in2[1] +
551 ((uint128_t) in1[8]) * in2[0];
552
553 /* See comment in felem_square about the use of in2x2 here */
554
555 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
556 ((uint128_t) in1[2]) * in2x2[7] +
557 ((uint128_t) in1[3]) * in2x2[6] +
558 ((uint128_t) in1[4]) * in2x2[5] +
559 ((uint128_t) in1[5]) * in2x2[4] +
560 ((uint128_t) in1[6]) * in2x2[3] +
561 ((uint128_t) in1[7]) * in2x2[2] +
562 ((uint128_t) in1[8]) * in2x2[1];
563
564 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
565 ((uint128_t) in1[3]) * in2x2[7] +
566 ((uint128_t) in1[4]) * in2x2[6] +
567 ((uint128_t) in1[5]) * in2x2[5] +
568 ((uint128_t) in1[6]) * in2x2[4] +
569 ((uint128_t) in1[7]) * in2x2[3] +
570 ((uint128_t) in1[8]) * in2x2[2];
571
572 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
573 ((uint128_t) in1[4]) * in2x2[7] +
574 ((uint128_t) in1[5]) * in2x2[6] +
575 ((uint128_t) in1[6]) * in2x2[5] +
576 ((uint128_t) in1[7]) * in2x2[4] +
577 ((uint128_t) in1[8]) * in2x2[3];
578
579 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
580 ((uint128_t) in1[5]) * in2x2[7] +
581 ((uint128_t) in1[6]) * in2x2[6] +
582 ((uint128_t) in1[7]) * in2x2[5] +
583 ((uint128_t) in1[8]) * in2x2[4];
584
585 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
586 ((uint128_t) in1[6]) * in2x2[7] +
587 ((uint128_t) in1[7]) * in2x2[6] +
588 ((uint128_t) in1[8]) * in2x2[5];
589
590 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
591 ((uint128_t) in1[7]) * in2x2[7] +
592 ((uint128_t) in1[8]) * in2x2[6];
593
594 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
595 ((uint128_t) in1[8]) * in2x2[7];
596
597 out[7] += ((uint128_t) in1[8]) * in2x2[8];
598 }
599
600 static const limb bottom52bits = 0xfffffffffffff;
601
602 /*-
603 * felem_reduce converts a largefelem to an felem.
604 * On entry:
605 * in[i] < 2^128
606 * On exit:
607 * out[i] < 2^59 + 2^14
608 */
609 static void felem_reduce(felem out, const largefelem in)
610 {
611 u64 overflow1, overflow2;
612
613 out[0] = ((limb) in[0]) & bottom58bits;
614 out[1] = ((limb) in[1]) & bottom58bits;
615 out[2] = ((limb) in[2]) & bottom58bits;
616 out[3] = ((limb) in[3]) & bottom58bits;
617 out[4] = ((limb) in[4]) & bottom58bits;
618 out[5] = ((limb) in[5]) & bottom58bits;
619 out[6] = ((limb) in[6]) & bottom58bits;
620 out[7] = ((limb) in[7]) & bottom58bits;
621 out[8] = ((limb) in[8]) & bottom58bits;
622
623 /* out[i] < 2^58 */
624
625 out[1] += ((limb) in[0]) >> 58;
626 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
627 /*-
628 * out[1] < 2^58 + 2^6 + 2^58
629 * = 2^59 + 2^6
630 */
631 out[2] += ((limb) (in[0] >> 64)) >> 52;
632
633 out[2] += ((limb) in[1]) >> 58;
634 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
635 out[3] += ((limb) (in[1] >> 64)) >> 52;
636
637 out[3] += ((limb) in[2]) >> 58;
638 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
639 out[4] += ((limb) (in[2] >> 64)) >> 52;
640
641 out[4] += ((limb) in[3]) >> 58;
642 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
643 out[5] += ((limb) (in[3] >> 64)) >> 52;
644
645 out[5] += ((limb) in[4]) >> 58;
646 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
647 out[6] += ((limb) (in[4] >> 64)) >> 52;
648
649 out[6] += ((limb) in[5]) >> 58;
650 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
651 out[7] += ((limb) (in[5] >> 64)) >> 52;
652
653 out[7] += ((limb) in[6]) >> 58;
654 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
655 out[8] += ((limb) (in[6] >> 64)) >> 52;
656
657 out[8] += ((limb) in[7]) >> 58;
658 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
659 /*-
660 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
661 * < 2^59 + 2^13
662 */
663 overflow1 = ((limb) (in[7] >> 64)) >> 52;
664
665 overflow1 += ((limb) in[8]) >> 58;
666 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
667 overflow2 = ((limb) (in[8] >> 64)) >> 52;
668
669 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
670 overflow2 <<= 1; /* overflow2 < 2^13 */
671
672 out[0] += overflow1; /* out[0] < 2^60 */
673 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
674
675 out[1] += out[0] >> 58; out[0] &= bottom58bits;
676 /*-
677 * out[0] < 2^58
678 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
679 * < 2^59 + 2^14
680 */
681 }
682
683 static void felem_square_reduce(felem out, const felem in)
684 {
685 largefelem tmp;
686 felem_square(tmp, in);
687 felem_reduce(out, tmp);
688 }
689
690 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
691 {
692 largefelem tmp;
693 felem_mul(tmp, in1, in2);
694 felem_reduce(out, tmp);
695 }
696
697 /*-
698 * felem_inv calculates |out| = |in|^{-1}
699 *
700 * Based on Fermat's Little Theorem:
701 * a^p = a (mod p)
702 * a^{p-1} = 1 (mod p)
703 * a^{p-2} = a^{-1} (mod p)
704 */
705 static void felem_inv(felem out, const felem in)
706 {
707 felem ftmp, ftmp2, ftmp3, ftmp4;
708 largefelem tmp;
709 unsigned i;
710
711 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
712 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
713 felem_assign(ftmp2, ftmp);
714 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
715 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
716 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
717
718 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
719 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
720 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
721
722 felem_assign(ftmp2, ftmp3);
723 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
724 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
725 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
726 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
727 felem_assign(ftmp4, ftmp3);
728 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
729 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
730 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
731 felem_assign(ftmp2, ftmp3);
732
733 for (i = 0; i < 8; i++)
734 {
735 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
736 }
737 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
738 felem_assign(ftmp2, ftmp3);
739
740 for (i = 0; i < 16; i++)
741 {
742 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
743 }
744 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
745 felem_assign(ftmp2, ftmp3);
746
747 for (i = 0; i < 32; i++)
748 {
749 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
750 }
751 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
752 felem_assign(ftmp2, ftmp3);
753
754 for (i = 0; i < 64; i++)
755 {
756 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
757 }
758 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
759 felem_assign(ftmp2, ftmp3);
760
761 for (i = 0; i < 128; i++)
762 {
763 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
764 }
765 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
766 felem_assign(ftmp2, ftmp3);
767
768 for (i = 0; i < 256; i++)
769 {
770 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
771 }
772 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
773
774 for (i = 0; i < 9; i++)
775 {
776 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
777 }
778 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
779 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
780 }
781
782 /* This is 2^521-1, expressed as an felem */
783 static const felem kPrime =
784 {
785 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
786 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
787 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
788 };
789
790 /*-
791 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
792 * otherwise.
793 * On entry:
794 * in[i] < 2^59 + 2^14
795 */
796 static limb felem_is_zero(const felem in)
797 {
798 felem ftmp;
799 limb is_zero, is_p;
800 felem_assign(ftmp, in);
801
802 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
803 /* ftmp[8] < 2^57 */
804 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
805 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
806 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
807 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
808 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
809 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
810 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
811 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
812 /* ftmp[8] < 2^57 + 4 */
813
814 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
815 * greater than our bound for ftmp[8]. Therefore we only have to check
816 * if the zero is zero or 2^521-1. */
817
818 is_zero = 0;
819 is_zero |= ftmp[0];
820 is_zero |= ftmp[1];
821 is_zero |= ftmp[2];
822 is_zero |= ftmp[3];
823 is_zero |= ftmp[4];
824 is_zero |= ftmp[5];
825 is_zero |= ftmp[6];
826 is_zero |= ftmp[7];
827 is_zero |= ftmp[8];
828
829 is_zero--;
830 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
831 * can be set is if is_zero was 0 before the decrement. */
832 is_zero = ((s64) is_zero) >> 63;
833
834 is_p = ftmp[0] ^ kPrime[0];
835 is_p |= ftmp[1] ^ kPrime[1];
836 is_p |= ftmp[2] ^ kPrime[2];
837 is_p |= ftmp[3] ^ kPrime[3];
838 is_p |= ftmp[4] ^ kPrime[4];
839 is_p |= ftmp[5] ^ kPrime[5];
840 is_p |= ftmp[6] ^ kPrime[6];
841 is_p |= ftmp[7] ^ kPrime[7];
842 is_p |= ftmp[8] ^ kPrime[8];
843
844 is_p--;
845 is_p = ((s64) is_p) >> 63;
846
847 is_zero |= is_p;
848 return is_zero;
849 }
850
851 static int felem_is_zero_int(const felem in)
852 {
853 return (int) (felem_is_zero(in) & ((limb)1));
854 }
855
856 /*-
857 * felem_contract converts |in| to its unique, minimal representation.
858 * On entry:
859 * in[i] < 2^59 + 2^14
860 */
861 static void felem_contract(felem out, const felem in)
862 {
863 limb is_p, is_greater, sign;
864 static const limb two58 = ((limb)1) << 58;
865
866 felem_assign(out, in);
867
868 out[0] += out[8] >> 57; out[8] &= bottom57bits;
869 /* out[8] < 2^57 */
870 out[1] += out[0] >> 58; out[0] &= bottom58bits;
871 out[2] += out[1] >> 58; out[1] &= bottom58bits;
872 out[3] += out[2] >> 58; out[2] &= bottom58bits;
873 out[4] += out[3] >> 58; out[3] &= bottom58bits;
874 out[5] += out[4] >> 58; out[4] &= bottom58bits;
875 out[6] += out[5] >> 58; out[5] &= bottom58bits;
876 out[7] += out[6] >> 58; out[6] &= bottom58bits;
877 out[8] += out[7] >> 58; out[7] &= bottom58bits;
878 /* out[8] < 2^57 + 4 */
879
880 /* If the value is greater than 2^521-1 then we have to subtract
881 * 2^521-1 out. See the comments in felem_is_zero regarding why we
882 * don't test for other multiples of the prime. */
883
884 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
885
886 is_p = out[0] ^ kPrime[0];
887 is_p |= out[1] ^ kPrime[1];
888 is_p |= out[2] ^ kPrime[2];
889 is_p |= out[3] ^ kPrime[3];
890 is_p |= out[4] ^ kPrime[4];
891 is_p |= out[5] ^ kPrime[5];
892 is_p |= out[6] ^ kPrime[6];
893 is_p |= out[7] ^ kPrime[7];
894 is_p |= out[8] ^ kPrime[8];
895
896 is_p--;
897 is_p &= is_p << 32;
898 is_p &= is_p << 16;
899 is_p &= is_p << 8;
900 is_p &= is_p << 4;
901 is_p &= is_p << 2;
902 is_p &= is_p << 1;
903 is_p = ((s64) is_p) >> 63;
904 is_p = ~is_p;
905
906 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
907
908 out[0] &= is_p;
909 out[1] &= is_p;
910 out[2] &= is_p;
911 out[3] &= is_p;
912 out[4] &= is_p;
913 out[5] &= is_p;
914 out[6] &= is_p;
915 out[7] &= is_p;
916 out[8] &= is_p;
917
918 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
919 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
920 is_greater = out[8] >> 57;
921 is_greater |= is_greater << 32;
922 is_greater |= is_greater << 16;
923 is_greater |= is_greater << 8;
924 is_greater |= is_greater << 4;
925 is_greater |= is_greater << 2;
926 is_greater |= is_greater << 1;
927 is_greater = ((s64) is_greater) >> 63;
928
929 out[0] -= kPrime[0] & is_greater;
930 out[1] -= kPrime[1] & is_greater;
931 out[2] -= kPrime[2] & is_greater;
932 out[3] -= kPrime[3] & is_greater;
933 out[4] -= kPrime[4] & is_greater;
934 out[5] -= kPrime[5] & is_greater;
935 out[6] -= kPrime[6] & is_greater;
936 out[7] -= kPrime[7] & is_greater;
937 out[8] -= kPrime[8] & is_greater;
938
939 /* Eliminate negative coefficients */
940 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
941 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
942 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
943 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
944 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
945 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
946 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
947 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
948 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
949 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
950 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
951 }
952
953 /*-
954 * Group operations
955 * ----------------
956 *
957 * Building on top of the field operations we have the operations on the
958 * elliptic curve group itself. Points on the curve are represented in Jacobian
959 * coordinates */
960
961 /*-
962 * point_double calcuates 2*(x_in, y_in, z_in)
963 *
964 * The method is taken from:
965 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
966 *
967 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
968 * while x_out == y_in is not (maybe this works, but it's not tested). */
969 static void
970 point_double(felem x_out, felem y_out, felem z_out,
971 const felem x_in, const felem y_in, const felem z_in)
972 {
973 largefelem tmp, tmp2;
974 felem delta, gamma, beta, alpha, ftmp, ftmp2;
975
976 felem_assign(ftmp, x_in);
977 felem_assign(ftmp2, x_in);
978
979 /* delta = z^2 */
980 felem_square(tmp, z_in);
981 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
982
983 /* gamma = y^2 */
984 felem_square(tmp, y_in);
985 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
986
987 /* beta = x*gamma */
988 felem_mul(tmp, x_in, gamma);
989 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
990
991 /* alpha = 3*(x-delta)*(x+delta) */
992 felem_diff64(ftmp, delta);
993 /* ftmp[i] < 2^61 */
994 felem_sum64(ftmp2, delta);
995 /* ftmp2[i] < 2^60 + 2^15 */
996 felem_scalar64(ftmp2, 3);
997 /* ftmp2[i] < 3*2^60 + 3*2^15 */
998 felem_mul(tmp, ftmp, ftmp2);
999 /*-
1000 * tmp[i] < 17(3*2^121 + 3*2^76)
1001 * = 61*2^121 + 61*2^76
1002 * < 64*2^121 + 64*2^76
1003 * = 2^127 + 2^82
1004 * < 2^128
1005 */
1006 felem_reduce(alpha, tmp);
1007
1008 /* x' = alpha^2 - 8*beta */
1009 felem_square(tmp, alpha);
1010 /* tmp[i] < 17*2^120
1011 * < 2^125 */
1012 felem_assign(ftmp, beta);
1013 felem_scalar64(ftmp, 8);
1014 /* ftmp[i] < 2^62 + 2^17 */
1015 felem_diff_128_64(tmp, ftmp);
1016 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1017 felem_reduce(x_out, tmp);
1018
1019 /* z' = (y + z)^2 - gamma - delta */
1020 felem_sum64(delta, gamma);
1021 /* delta[i] < 2^60 + 2^15 */
1022 felem_assign(ftmp, y_in);
1023 felem_sum64(ftmp, z_in);
1024 /* ftmp[i] < 2^60 + 2^15 */
1025 felem_square(tmp, ftmp);
1026 /* tmp[i] < 17(2^122)
1027 * < 2^127 */
1028 felem_diff_128_64(tmp, delta);
1029 /* tmp[i] < 2^127 + 2^63 */
1030 felem_reduce(z_out, tmp);
1031
1032 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1033 felem_scalar64(beta, 4);
1034 /* beta[i] < 2^61 + 2^16 */
1035 felem_diff64(beta, x_out);
1036 /* beta[i] < 2^61 + 2^60 + 2^16 */
1037 felem_mul(tmp, alpha, beta);
1038 /*-
1039 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1040 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1041 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1042 * < 2^128
1043 */
1044 felem_square(tmp2, gamma);
1045 /*-
1046 * tmp2[i] < 17*(2^59 + 2^14)^2
1047 * = 17*(2^118 + 2^74 + 2^28)
1048 */
1049 felem_scalar128(tmp2, 8);
1050 /*-
1051 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1052 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1053 * < 2^126
1054 */
1055 felem_diff128(tmp, tmp2);
1056 /*-
1057 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1058 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1059 * 2^74 + 2^69 + 2^34 + 2^30
1060 * < 2^128
1061 */
1062 felem_reduce(y_out, tmp);
1063 }
1064
1065 /* copy_conditional copies in to out iff mask is all ones. */
1066 static void
1067 copy_conditional(felem out, const felem in, limb mask)
1068 {
1069 unsigned i;
1070 for (i = 0; i < NLIMBS; ++i)
1071 {
1072 const limb tmp = mask & (in[i] ^ out[i]);
1073 out[i] ^= tmp;
1074 }
1075 }
1076
1077 /*-
1078 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1079 *
1080 * The method is taken from
1081 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1082 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1083 *
1084 * This function includes a branch for checking whether the two input points
1085 * are equal (while not equal to the point at infinity). This case never
1086 * happens during single point multiplication, so there is no timing leak for
1087 * ECDH or ECDSA signing. */
1088 static void point_add(felem x3, felem y3, felem z3,
1089 const felem x1, const felem y1, const felem z1,
1090 const int mixed, const felem x2, const felem y2, const felem z2)
1091 {
1092 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1093 largefelem tmp, tmp2;
1094 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1095
1096 z1_is_zero = felem_is_zero(z1);
1097 z2_is_zero = felem_is_zero(z2);
1098
1099 /* ftmp = z1z1 = z1**2 */
1100 felem_square(tmp, z1);
1101 felem_reduce(ftmp, tmp);
1102
1103 if (!mixed)
1104 {
1105 /* ftmp2 = z2z2 = z2**2 */
1106 felem_square(tmp, z2);
1107 felem_reduce(ftmp2, tmp);
1108
1109 /* u1 = ftmp3 = x1*z2z2 */
1110 felem_mul(tmp, x1, ftmp2);
1111 felem_reduce(ftmp3, tmp);
1112
1113 /* ftmp5 = z1 + z2 */
1114 felem_assign(ftmp5, z1);
1115 felem_sum64(ftmp5, z2);
1116 /* ftmp5[i] < 2^61 */
1117
1118 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1119 felem_square(tmp, ftmp5);
1120 /* tmp[i] < 17*2^122 */
1121 felem_diff_128_64(tmp, ftmp);
1122 /* tmp[i] < 17*2^122 + 2^63 */
1123 felem_diff_128_64(tmp, ftmp2);
1124 /* tmp[i] < 17*2^122 + 2^64 */
1125 felem_reduce(ftmp5, tmp);
1126
1127 /* ftmp2 = z2 * z2z2 */
1128 felem_mul(tmp, ftmp2, z2);
1129 felem_reduce(ftmp2, tmp);
1130
1131 /* s1 = ftmp6 = y1 * z2**3 */
1132 felem_mul(tmp, y1, ftmp2);
1133 felem_reduce(ftmp6, tmp);
1134 }
1135 else
1136 {
1137 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1138
1139 /* u1 = ftmp3 = x1*z2z2 */
1140 felem_assign(ftmp3, x1);
1141
1142 /* ftmp5 = 2*z1z2 */
1143 felem_scalar(ftmp5, z1, 2);
1144
1145 /* s1 = ftmp6 = y1 * z2**3 */
1146 felem_assign(ftmp6, y1);
1147 }
1148
1149 /* u2 = x2*z1z1 */
1150 felem_mul(tmp, x2, ftmp);
1151 /* tmp[i] < 17*2^120 */
1152
1153 /* h = ftmp4 = u2 - u1 */
1154 felem_diff_128_64(tmp, ftmp3);
1155 /* tmp[i] < 17*2^120 + 2^63 */
1156 felem_reduce(ftmp4, tmp);
1157
1158 x_equal = felem_is_zero(ftmp4);
1159
1160 /* z_out = ftmp5 * h */
1161 felem_mul(tmp, ftmp5, ftmp4);
1162 felem_reduce(z_out, tmp);
1163
1164 /* ftmp = z1 * z1z1 */
1165 felem_mul(tmp, ftmp, z1);
1166 felem_reduce(ftmp, tmp);
1167
1168 /* s2 = tmp = y2 * z1**3 */
1169 felem_mul(tmp, y2, ftmp);
1170 /* tmp[i] < 17*2^120 */
1171
1172 /* r = ftmp5 = (s2 - s1)*2 */
1173 felem_diff_128_64(tmp, ftmp6);
1174 /* tmp[i] < 17*2^120 + 2^63 */
1175 felem_reduce(ftmp5, tmp);
1176 y_equal = felem_is_zero(ftmp5);
1177 felem_scalar64(ftmp5, 2);
1178 /* ftmp5[i] < 2^61 */
1179
1180 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1181 {
1182 point_double(x3, y3, z3, x1, y1, z1);
1183 return;
1184 }
1185
1186 /* I = ftmp = (2h)**2 */
1187 felem_assign(ftmp, ftmp4);
1188 felem_scalar64(ftmp, 2);
1189 /* ftmp[i] < 2^61 */
1190 felem_square(tmp, ftmp);
1191 /* tmp[i] < 17*2^122 */
1192 felem_reduce(ftmp, tmp);
1193
1194 /* J = ftmp2 = h * I */
1195 felem_mul(tmp, ftmp4, ftmp);
1196 felem_reduce(ftmp2, tmp);
1197
1198 /* V = ftmp4 = U1 * I */
1199 felem_mul(tmp, ftmp3, ftmp);
1200 felem_reduce(ftmp4, tmp);
1201
1202 /* x_out = r**2 - J - 2V */
1203 felem_square(tmp, ftmp5);
1204 /* tmp[i] < 17*2^122 */
1205 felem_diff_128_64(tmp, ftmp2);
1206 /* tmp[i] < 17*2^122 + 2^63 */
1207 felem_assign(ftmp3, ftmp4);
1208 felem_scalar64(ftmp4, 2);
1209 /* ftmp4[i] < 2^61 */
1210 felem_diff_128_64(tmp, ftmp4);
1211 /* tmp[i] < 17*2^122 + 2^64 */
1212 felem_reduce(x_out, tmp);
1213
1214 /* y_out = r(V-x_out) - 2 * s1 * J */
1215 felem_diff64(ftmp3, x_out);
1216 /* ftmp3[i] < 2^60 + 2^60
1217 * = 2^61 */
1218 felem_mul(tmp, ftmp5, ftmp3);
1219 /* tmp[i] < 17*2^122 */
1220 felem_mul(tmp2, ftmp6, ftmp2);
1221 /* tmp2[i] < 17*2^120 */
1222 felem_scalar128(tmp2, 2);
1223 /* tmp2[i] < 17*2^121 */
1224 felem_diff128(tmp, tmp2);
1225 /*-
1226 * tmp[i] < 2^127 - 2^69 + 17*2^122
1227 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1228 * < 2^127
1229 */
1230 felem_reduce(y_out, tmp);
1231
1232 copy_conditional(x_out, x2, z1_is_zero);
1233 copy_conditional(x_out, x1, z2_is_zero);
1234 copy_conditional(y_out, y2, z1_is_zero);
1235 copy_conditional(y_out, y1, z2_is_zero);
1236 copy_conditional(z_out, z2, z1_is_zero);
1237 copy_conditional(z_out, z1, z2_is_zero);
1238 felem_assign(x3, x_out);
1239 felem_assign(y3, y_out);
1240 felem_assign(z3, z_out);
1241 }
1242
1243 /*-
1244 * Base point pre computation
1245 * --------------------------
1246 *
1247 * Two different sorts of precomputed tables are used in the following code.
1248 * Each contain various points on the curve, where each point is three field
1249 * elements (x, y, z).
1250 *
1251 * For the base point table, z is usually 1 (0 for the point at infinity).
1252 * This table has 16 elements:
1253 * index | bits | point
1254 * ------+---------+------------------------------
1255 * 0 | 0 0 0 0 | 0G
1256 * 1 | 0 0 0 1 | 1G
1257 * 2 | 0 0 1 0 | 2^130G
1258 * 3 | 0 0 1 1 | (2^130 + 1)G
1259 * 4 | 0 1 0 0 | 2^260G
1260 * 5 | 0 1 0 1 | (2^260 + 1)G
1261 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1262 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1263 * 8 | 1 0 0 0 | 2^390G
1264 * 9 | 1 0 0 1 | (2^390 + 1)G
1265 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1266 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1267 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1268 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1269 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1270 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1271 *
1272 * The reason for this is so that we can clock bits into four different
1273 * locations when doing simple scalar multiplies against the base point.
1274 *
1275 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1276
1277 /* gmul is the table of precomputed base points */
1278 static const felem gmul[16][3] =
1279 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1280 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1281 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1282 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1283 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1284 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1285 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1286 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1287 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1288 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1289 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1290 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1291 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1292 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1293 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1294 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1295 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1296 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1297 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1298 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1299 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1300 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1301 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1302 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1303 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1304 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1305 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1306 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1307 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1308 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1309 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1310 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1311 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1312 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1313 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1314 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1315 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1316 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1317 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1318 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1319 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1320 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1321 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1322 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1323 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1324 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1325 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1326 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1327 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1328 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1329 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1330 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1331 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1332 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1333 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1334 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1335 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1336 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1337 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1338 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1339 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1340 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1341 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1342 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1343 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1344 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1345 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1346 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1347 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1348 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1349 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1350 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1351 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1352 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1353 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1354 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1355 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1356 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1357 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1358 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1359 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1360 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1361 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1362 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1363 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1364 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1365 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1366 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1367 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1368 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1369 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1370 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1371 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1372 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1373 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1374 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1375 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1376 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1377 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1378 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1379 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1380 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1381 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1382 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1383 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1384 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1385 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1386 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1387
1388 /* select_point selects the |idx|th point from a precomputation table and
1389 * copies it to out. */
1390 /* pre_comp below is of the size provided in |size| */
1391 static void select_point(const limb idx, unsigned int size, const felem pre_comp[][3],
1392 felem out[3])
1393 {
1394 unsigned i, j;
1395 limb *outlimbs = &out[0][0];
1396 memset(outlimbs, 0, 3 * sizeof(felem));
1397
1398 for (i = 0; i < size; i++)
1399 {
1400 const limb *inlimbs = &pre_comp[i][0][0];
1401 limb mask = i ^ idx;
1402 mask |= mask >> 4;
1403 mask |= mask >> 2;
1404 mask |= mask >> 1;
1405 mask &= 1;
1406 mask--;
1407 for (j = 0; j < NLIMBS * 3; j++)
1408 outlimbs[j] |= inlimbs[j] & mask;
1409 }
1410 }
1411
1412 /* get_bit returns the |i|th bit in |in| */
1413 static char get_bit(const felem_bytearray in, int i)
1414 {
1415 if (i < 0)
1416 return 0;
1417 return (in[i >> 3] >> (i & 7)) & 1;
1418 }
1419
1420 /* Interleaved point multiplication using precomputed point multiples:
1421 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1422 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1423 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1424 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1425 static void batch_mul(felem x_out, felem y_out, felem z_out,
1426 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1427 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1428 {
1429 int i, skip;
1430 unsigned num, gen_mul = (g_scalar != NULL);
1431 felem nq[3], tmp[4];
1432 limb bits;
1433 u8 sign, digit;
1434
1435 /* set nq to the point at infinity */
1436 memset(nq, 0, 3 * sizeof(felem));
1437
1438 /* Loop over all scalars msb-to-lsb, interleaving additions
1439 * of multiples of the generator (last quarter of rounds)
1440 * and additions of other points multiples (every 5th round).
1441 */
1442 skip = 1; /* save two point operations in the first round */
1443 for (i = (num_points ? 520 : 130); i >= 0; --i)
1444 {
1445 /* double */
1446 if (!skip)
1447 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1448
1449 /* add multiples of the generator */
1450 if (gen_mul && (i <= 130))
1451 {
1452 bits = get_bit(g_scalar, i + 390) << 3;
1453 if (i < 130)
1454 {
1455 bits |= get_bit(g_scalar, i + 260) << 2;
1456 bits |= get_bit(g_scalar, i + 130) << 1;
1457 bits |= get_bit(g_scalar, i);
1458 }
1459 /* select the point to add, in constant time */
1460 select_point(bits, 16, g_pre_comp, tmp);
1461 if (!skip)
1462 {
1463 point_add(nq[0], nq[1], nq[2],
1464 nq[0], nq[1], nq[2],
1465 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1466 }
1467 else
1468 {
1469 memcpy(nq, tmp, 3 * sizeof(felem));
1470 skip = 0;
1471 }
1472 }
1473
1474 /* do other additions every 5 doublings */
1475 if (num_points && (i % 5 == 0))
1476 {
1477 /* loop over all scalars */
1478 for (num = 0; num < num_points; ++num)
1479 {
1480 bits = get_bit(scalars[num], i + 4) << 5;
1481 bits |= get_bit(scalars[num], i + 3) << 4;
1482 bits |= get_bit(scalars[num], i + 2) << 3;
1483 bits |= get_bit(scalars[num], i + 1) << 2;
1484 bits |= get_bit(scalars[num], i) << 1;
1485 bits |= get_bit(scalars[num], i - 1);
1486 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1487
1488 /* select the point to add or subtract, in constant time */
1489 select_point(digit, 17, pre_comp[num], tmp);
1490 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1491 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1492
1493 if (!skip)
1494 {
1495 point_add(nq[0], nq[1], nq[2],
1496 nq[0], nq[1], nq[2],
1497 mixed, tmp[0], tmp[1], tmp[2]);
1498 }
1499 else
1500 {
1501 memcpy(nq, tmp, 3 * sizeof(felem));
1502 skip = 0;
1503 }
1504 }
1505 }
1506 }
1507 felem_assign(x_out, nq[0]);
1508 felem_assign(y_out, nq[1]);
1509 felem_assign(z_out, nq[2]);
1510 }
1511
1512
1513 /* Precomputation for the group generator. */
1514 typedef struct {
1515 felem g_pre_comp[16][3];
1516 int references;
1517 } NISTP521_PRE_COMP;
1518
1519 const EC_METHOD *EC_GFp_nistp521_method(void)
1520 {
1521 static const EC_METHOD ret = {
1522 EC_FLAGS_DEFAULT_OCT,
1523 NID_X9_62_prime_field,
1524 ec_GFp_nistp521_group_init,
1525 ec_GFp_simple_group_finish,
1526 ec_GFp_simple_group_clear_finish,
1527 ec_GFp_nist_group_copy,
1528 ec_GFp_nistp521_group_set_curve,
1529 ec_GFp_simple_group_get_curve,
1530 ec_GFp_simple_group_get_degree,
1531 ec_GFp_simple_group_check_discriminant,
1532 ec_GFp_simple_point_init,
1533 ec_GFp_simple_point_finish,
1534 ec_GFp_simple_point_clear_finish,
1535 ec_GFp_simple_point_copy,
1536 ec_GFp_simple_point_set_to_infinity,
1537 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1538 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1539 ec_GFp_simple_point_set_affine_coordinates,
1540 ec_GFp_nistp521_point_get_affine_coordinates,
1541 0 /* point_set_compressed_coordinates */,
1542 0 /* point2oct */,
1543 0 /* oct2point */,
1544 ec_GFp_simple_add,
1545 ec_GFp_simple_dbl,
1546 ec_GFp_simple_invert,
1547 ec_GFp_simple_is_at_infinity,
1548 ec_GFp_simple_is_on_curve,
1549 ec_GFp_simple_cmp,
1550 ec_GFp_simple_make_affine,
1551 ec_GFp_simple_points_make_affine,
1552 ec_GFp_nistp521_points_mul,
1553 ec_GFp_nistp521_precompute_mult,
1554 ec_GFp_nistp521_have_precompute_mult,
1555 ec_GFp_nist_field_mul,
1556 ec_GFp_nist_field_sqr,
1557 0 /* field_div */,
1558 0 /* field_encode */,
1559 0 /* field_decode */,
1560 0 /* field_set_to_one */ };
1561
1562 return &ret;
1563 }
1564
1565
1566 /******************************************************************************/
1567 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1568 */
1569
1570 static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1571 {
1572 NISTP521_PRE_COMP *ret = NULL;
1573 ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1574 if (!ret)
1575 {
1576 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1577 return ret;
1578 }
1579 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1580 ret->references = 1;
1581 return ret;
1582 }
1583
1584 static void *nistp521_pre_comp_dup(void *src_)
1585 {
1586 NISTP521_PRE_COMP *src = src_;
1587
1588 /* no need to actually copy, these objects never change! */
1589 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1590
1591 return src_;
1592 }
1593
1594 static void nistp521_pre_comp_free(void *pre_)
1595 {
1596 int i;
1597 NISTP521_PRE_COMP *pre = pre_;
1598
1599 if (!pre)
1600 return;
1601
1602 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1603 if (i > 0)
1604 return;
1605
1606 OPENSSL_free(pre);
1607 }
1608
1609 static void nistp521_pre_comp_clear_free(void *pre_)
1610 {
1611 int i;
1612 NISTP521_PRE_COMP *pre = pre_;
1613
1614 if (!pre)
1615 return;
1616
1617 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1618 if (i > 0)
1619 return;
1620
1621 OPENSSL_cleanse(pre, sizeof(*pre));
1622 OPENSSL_free(pre);
1623 }
1624
1625 /******************************************************************************/
1626 /* OPENSSL EC_METHOD FUNCTIONS
1627 */
1628
1629 int ec_GFp_nistp521_group_init(EC_GROUP *group)
1630 {
1631 int ret;
1632 ret = ec_GFp_simple_group_init(group);
1633 group->a_is_minus3 = 1;
1634 return ret;
1635 }
1636
1637 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1638 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1639 {
1640 int ret = 0;
1641 BN_CTX *new_ctx = NULL;
1642 BIGNUM *curve_p, *curve_a, *curve_b;
1643
1644 if (ctx == NULL)
1645 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1646 BN_CTX_start(ctx);
1647 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1648 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1649 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1650 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1651 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1652 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1653 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1654 (BN_cmp(curve_b, b)))
1655 {
1656 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1657 EC_R_WRONG_CURVE_PARAMETERS);
1658 goto err;
1659 }
1660 group->field_mod_func = BN_nist_mod_521;
1661 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1662 err:
1663 BN_CTX_end(ctx);
1664 if (new_ctx != NULL)
1665 BN_CTX_free(new_ctx);
1666 return ret;
1667 }
1668
1669 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1670 * (X', Y') = (X/Z^2, Y/Z^3) */
1671 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1672 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1673 {
1674 felem z1, z2, x_in, y_in, x_out, y_out;
1675 largefelem tmp;
1676
1677 if (EC_POINT_is_at_infinity(group, point))
1678 {
1679 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1680 EC_R_POINT_AT_INFINITY);
1681 return 0;
1682 }
1683 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1684 (!BN_to_felem(z1, &point->Z))) return 0;
1685 felem_inv(z2, z1);
1686 felem_square(tmp, z2); felem_reduce(z1, tmp);
1687 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1688 felem_contract(x_out, x_in);
1689 if (x != NULL)
1690 {
1691 if (!felem_to_BN(x, x_out))
1692 {
1693 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1694 return 0;
1695 }
1696 }
1697 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1698 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1699 felem_contract(y_out, y_in);
1700 if (y != NULL)
1701 {
1702 if (!felem_to_BN(y, y_out))
1703 {
1704 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1705 return 0;
1706 }
1707 }
1708 return 1;
1709 }
1710
1711 /* points below is of size |num|, and tmp_felems is of size |num+1/ */
1712 static void make_points_affine(size_t num, felem points[][3], felem tmp_felems[])
1713 {
1714 /* Runs in constant time, unless an input is the point at infinity
1715 * (which normally shouldn't happen). */
1716 ec_GFp_nistp_points_make_affine_internal(
1717 num,
1718 points,
1719 sizeof(felem),
1720 tmp_felems,
1721 (void (*)(void *)) felem_one,
1722 (int (*)(const void *)) felem_is_zero_int,
1723 (void (*)(void *, const void *)) felem_assign,
1724 (void (*)(void *, const void *)) felem_square_reduce,
1725 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1726 (void (*)(void *, const void *)) felem_inv,
1727 (void (*)(void *, const void *)) felem_contract);
1728 }
1729
1730 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1731 * Result is stored in r (r can equal one of the inputs). */
1732 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1733 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1734 const BIGNUM *scalars[], BN_CTX *ctx)
1735 {
1736 int ret = 0;
1737 int j;
1738 int mixed = 0;
1739 BN_CTX *new_ctx = NULL;
1740 BIGNUM *x, *y, *z, *tmp_scalar;
1741 felem_bytearray g_secret;
1742 felem_bytearray *secrets = NULL;
1743 felem (*pre_comp)[17][3] = NULL;
1744 felem *tmp_felems = NULL;
1745 felem_bytearray tmp;
1746 unsigned i, num_bytes;
1747 int have_pre_comp = 0;
1748 size_t num_points = num;
1749 felem x_in, y_in, z_in, x_out, y_out, z_out;
1750 NISTP521_PRE_COMP *pre = NULL;
1751 felem (*g_pre_comp)[3] = NULL;
1752 EC_POINT *generator = NULL;
1753 const EC_POINT *p = NULL;
1754 const BIGNUM *p_scalar = NULL;
1755
1756 if (ctx == NULL)
1757 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1758 BN_CTX_start(ctx);
1759 if (((x = BN_CTX_get(ctx)) == NULL) ||
1760 ((y = BN_CTX_get(ctx)) == NULL) ||
1761 ((z = BN_CTX_get(ctx)) == NULL) ||
1762 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1763 goto err;
1764
1765 if (scalar != NULL)
1766 {
1767 pre = EC_EX_DATA_get_data(group->extra_data,
1768 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1769 nistp521_pre_comp_clear_free);
1770 if (pre)
1771 /* we have precomputation, try to use it */
1772 g_pre_comp = &pre->g_pre_comp[0];
1773 else
1774 /* try to use the standard precomputation */
1775 g_pre_comp = (felem (*)[3]) gmul;
1776 generator = EC_POINT_new(group);
1777 if (generator == NULL)
1778 goto err;
1779 /* get the generator from precomputation */
1780 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1781 !felem_to_BN(y, g_pre_comp[1][1]) ||
1782 !felem_to_BN(z, g_pre_comp[1][2]))
1783 {
1784 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1785 goto err;
1786 }
1787 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1788 generator, x, y, z, ctx))
1789 goto err;
1790 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1791 /* precomputation matches generator */
1792 have_pre_comp = 1;
1793 else
1794 /* we don't have valid precomputation:
1795 * treat the generator as a random point */
1796 num_points++;
1797 }
1798
1799 if (num_points > 0)
1800 {
1801 if (num_points >= 2)
1802 {
1803 /* unless we precompute multiples for just one point,
1804 * converting those into affine form is time well spent */
1805 mixed = 1;
1806 }
1807 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1808 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1809 if (mixed)
1810 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1811 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1812 {
1813 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1814 goto err;
1815 }
1816
1817 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1818 * i.e., they contribute nothing to the linear combination */
1819 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1820 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1821 for (i = 0; i < num_points; ++i)
1822 {
1823 if (i == num)
1824 /* we didn't have a valid precomputation, so we pick
1825 * the generator */
1826 {
1827 p = EC_GROUP_get0_generator(group);
1828 p_scalar = scalar;
1829 }
1830 else
1831 /* the i^th point */
1832 {
1833 p = points[i];
1834 p_scalar = scalars[i];
1835 }
1836 if ((p_scalar != NULL) && (p != NULL))
1837 {
1838 /* reduce scalar to 0 <= scalar < 2^521 */
1839 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
1840 {
1841 /* this is an unusual input, and we don't guarantee
1842 * constant-timeness */
1843 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1844 {
1845 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1846 goto err;
1847 }
1848 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1849 }
1850 else
1851 num_bytes = BN_bn2bin(p_scalar, tmp);
1852 flip_endian(secrets[i], tmp, num_bytes);
1853 /* precompute multiples */
1854 if ((!BN_to_felem(x_out, &p->X)) ||
1855 (!BN_to_felem(y_out, &p->Y)) ||
1856 (!BN_to_felem(z_out, &p->Z))) goto err;
1857 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1858 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1859 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1860 for (j = 2; j <= 16; ++j)
1861 {
1862 if (j & 1)
1863 {
1864 point_add(
1865 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1866 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1867 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1868 }
1869 else
1870 {
1871 point_double(
1872 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1873 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1874 }
1875 }
1876 }
1877 }
1878 if (mixed)
1879 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1880 }
1881
1882 /* the scalar for the generator */
1883 if ((scalar != NULL) && (have_pre_comp))
1884 {
1885 memset(g_secret, 0, sizeof(g_secret));
1886 /* reduce scalar to 0 <= scalar < 2^521 */
1887 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
1888 {
1889 /* this is an unusual input, and we don't guarantee
1890 * constant-timeness */
1891 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1892 {
1893 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1894 goto err;
1895 }
1896 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1897 }
1898 else
1899 num_bytes = BN_bn2bin(scalar, tmp);
1900 flip_endian(g_secret, tmp, num_bytes);
1901 /* do the multiplication with generator precomputation*/
1902 batch_mul(x_out, y_out, z_out,
1903 (const felem_bytearray (*)) secrets, num_points,
1904 g_secret,
1905 mixed, (const felem (*)[17][3]) pre_comp,
1906 (const felem (*)[3]) g_pre_comp);
1907 }
1908 else
1909 /* do the multiplication without generator precomputation */
1910 batch_mul(x_out, y_out, z_out,
1911 (const felem_bytearray (*)) secrets, num_points,
1912 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1913 /* reduce the output to its unique minimal representation */
1914 felem_contract(x_in, x_out);
1915 felem_contract(y_in, y_out);
1916 felem_contract(z_in, z_out);
1917 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1918 (!felem_to_BN(z, z_in)))
1919 {
1920 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1921 goto err;
1922 }
1923 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1924
1925 err:
1926 BN_CTX_end(ctx);
1927 if (generator != NULL)
1928 EC_POINT_free(generator);
1929 if (new_ctx != NULL)
1930 BN_CTX_free(new_ctx);
1931 if (secrets != NULL)
1932 OPENSSL_free(secrets);
1933 if (pre_comp != NULL)
1934 OPENSSL_free(pre_comp);
1935 if (tmp_felems != NULL)
1936 OPENSSL_free(tmp_felems);
1937 return ret;
1938 }
1939
1940 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1941 {
1942 int ret = 0;
1943 NISTP521_PRE_COMP *pre = NULL;
1944 int i, j;
1945 BN_CTX *new_ctx = NULL;
1946 BIGNUM *x, *y;
1947 EC_POINT *generator = NULL;
1948 felem tmp_felems[16];
1949
1950 /* throw away old precomputation */
1951 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
1952 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1953 if (ctx == NULL)
1954 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1955 BN_CTX_start(ctx);
1956 if (((x = BN_CTX_get(ctx)) == NULL) ||
1957 ((y = BN_CTX_get(ctx)) == NULL))
1958 goto err;
1959 /* get the generator */
1960 if (group->generator == NULL) goto err;
1961 generator = EC_POINT_new(group);
1962 if (generator == NULL)
1963 goto err;
1964 BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
1965 BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
1966 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1967 goto err;
1968 if ((pre = nistp521_pre_comp_new()) == NULL)
1969 goto err;
1970 /* if the generator is the standard one, use built-in precomputation */
1971 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1972 {
1973 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1974 ret = 1;
1975 goto err;
1976 }
1977 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
1978 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
1979 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
1980 goto err;
1981 /* compute 2^130*G, 2^260*G, 2^390*G */
1982 for (i = 1; i <= 4; i <<= 1)
1983 {
1984 point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
1985 pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
1986 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1987 for (j = 0; j < 129; ++j)
1988 {
1989 point_double(pre->g_pre_comp[2*i][0],
1990 pre->g_pre_comp[2*i][1],
1991 pre->g_pre_comp[2*i][2],
1992 pre->g_pre_comp[2*i][0],
1993 pre->g_pre_comp[2*i][1],
1994 pre->g_pre_comp[2*i][2]);
1995 }
1996 }
1997 /* g_pre_comp[0] is the point at infinity */
1998 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1999 /* the remaining multiples */
2000 /* 2^130*G + 2^260*G */
2001 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2002 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2003 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2004 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2005 pre->g_pre_comp[2][2]);
2006 /* 2^130*G + 2^390*G */
2007 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2008 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2009 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2010 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2011 pre->g_pre_comp[2][2]);
2012 /* 2^260*G + 2^390*G */
2013 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2014 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2015 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2016 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2017 pre->g_pre_comp[4][2]);
2018 /* 2^130*G + 2^260*G + 2^390*G */
2019 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2020 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2021 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2022 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2023 pre->g_pre_comp[2][2]);
2024 for (i = 1; i < 8; ++i)
2025 {
2026 /* odd multiples: add G */
2027 point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
2028 pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
2029 pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
2030 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2031 pre->g_pre_comp[1][2]);
2032 }
2033 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2034
2035 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2036 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
2037 goto err;
2038 ret = 1;
2039 pre = NULL;
2040 err:
2041 BN_CTX_end(ctx);
2042 if (generator != NULL)
2043 EC_POINT_free(generator);
2044 if (new_ctx != NULL)
2045 BN_CTX_free(new_ctx);
2046 if (pre)
2047 nistp521_pre_comp_free(pre);
2048 return ret;
2049 }
2050
2051 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2052 {
2053 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2054 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2055 != NULL)
2056 return 1;
2057 else
2058 return 0;
2059 }
2060
2061 #else
2062 static void *dummy=&dummy;
2063 #endif