]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp521.c
GH601: Various spelling fixes.
[thirdparty/openssl.git] / crypto / ec / ecp_nistp521.c
CommitLineData
3e00b4c9
BM
1/*
2 * Written by Adam Langley (Google) for the OpenSSL project
3 */
4/* Copyright 2011 Google Inc.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 *
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20/*
21 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
22 *
23 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
24 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
25 * work which got its smarts from Daniel J. Bernstein's work on the same.
26 */
27
e0d6132b
BM
28#include <openssl/opensslconf.h>
29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
3e00b4c9 30
0f113f3e
MC
31# ifndef OPENSSL_SYS_VMS
32# include <stdint.h>
33# else
34# include <inttypes.h>
35# endif
71fa4513 36
0f113f3e
MC
37# include <string.h>
38# include <openssl/err.h>
39# include "ec_lcl.h"
3e00b4c9 40
0f113f3e 41# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
3e00b4c9 42 /* even with gcc, the typedef won't work for 32-bit platforms */
0f113f3e
MC
43typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
44 * platforms */
45# else
46# error "Need GCC 3.1 or later to define type uint128_t"
47# endif
3e00b4c9
BM
48
49typedef uint8_t u8;
50typedef uint64_t u64;
51typedef int64_t s64;
52
0f113f3e
MC
53/*
54 * The underlying field. P521 operates over GF(2^521-1). We can serialise an
55 * element of this field into 66 bytes where the most significant byte
56 * contains only a single bit. We call this an felem_bytearray.
57 */
3e00b4c9
BM
58
59typedef u8 felem_bytearray[66];
60
0f113f3e
MC
61/*
62 * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
63 * These values are big-endian.
64 */
65static const felem_bytearray nistp521_curve_params[5] = {
66 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff},
75 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
83 0xff, 0xfc},
84 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
85 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
86 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
87 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
88 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
89 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
90 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
91 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
92 0x3f, 0x00},
93 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
94 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
95 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
96 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
97 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
98 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
99 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
100 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
101 0xbd, 0x66},
102 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
103 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
104 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
105 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
106 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
107 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
108 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
109 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
110 0x66, 0x50}
111};
3e00b4c9 112
1d97c843
TH
113/*-
114 * The representation of field elements.
3e00b4c9
BM
115 * ------------------------------------
116 *
117 * We represent field elements with nine values. These values are either 64 or
118 * 128 bits and the field element represented is:
119 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
120 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
121 * 58 bits apart, but are greater than 58 bits in length, the most significant
122 * bits of each limb overlap with the least significant bits of the next.
123 *
124 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
125 * 'largefelem' */
126
0f113f3e 127# define NLIMBS 9
3e00b4c9
BM
128
129typedef uint64_t limb;
130typedef limb felem[NLIMBS];
131typedef uint128_t largefelem[NLIMBS];
132
133static const limb bottom57bits = 0x1ffffffffffffff;
134static const limb bottom58bits = 0x3ffffffffffffff;
135
0f113f3e
MC
136/*
137 * bin66_to_felem takes a little-endian byte array and converts it into felem
138 * form. This assumes that the CPU is little-endian.
139 */
3e00b4c9 140static void bin66_to_felem(felem out, const u8 in[66])
0f113f3e
MC
141{
142 out[0] = (*((limb *) & in[0])) & bottom58bits;
143 out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits;
144 out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits;
145 out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits;
146 out[4] = (*((limb *) & in[29])) & bottom58bits;
147 out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits;
148 out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits;
149 out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits;
150 out[8] = (*((limb *) & in[58])) & bottom57bits;
151}
152
153/*
154 * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
155 * array. This assumes that the CPU is little-endian.
156 */
3e00b4c9 157static void felem_to_bin66(u8 out[66], const felem in)
0f113f3e
MC
158{
159 memset(out, 0, 66);
160 (*((limb *) & out[0])) = in[0];
161 (*((limb *) & out[7])) |= in[1] << 2;
162 (*((limb *) & out[14])) |= in[2] << 4;
163 (*((limb *) & out[21])) |= in[3] << 6;
164 (*((limb *) & out[29])) = in[4];
165 (*((limb *) & out[36])) |= in[5] << 2;
166 (*((limb *) & out[43])) |= in[6] << 4;
167 (*((limb *) & out[50])) |= in[7] << 6;
168 (*((limb *) & out[58])) = in[8];
169}
3e00b4c9
BM
170
171/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
172static void flip_endian(u8 *out, const u8 *in, unsigned len)
0f113f3e
MC
173{
174 unsigned i;
175 for (i = 0; i < len; ++i)
176 out[i] = in[len - 1 - i];
177}
3e00b4c9
BM
178
179/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
180static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e
MC
181{
182 felem_bytearray b_in;
183 felem_bytearray b_out;
184 unsigned num_bytes;
185
186 /* BN_bn2bin eats leading zeroes */
16f8d4eb 187 memset(b_out, 0, sizeof(b_out));
0f113f3e
MC
188 num_bytes = BN_num_bytes(bn);
189 if (num_bytes > sizeof b_out) {
190 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
191 return 0;
192 }
193 if (BN_is_negative(bn)) {
194 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
195 return 0;
196 }
197 num_bytes = BN_bn2bin(bn, b_in);
198 flip_endian(b_out, b_in, num_bytes);
199 bin66_to_felem(out, b_out);
200 return 1;
201}
3e00b4c9
BM
202
203/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
204static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
0f113f3e
MC
205{
206 felem_bytearray b_in, b_out;
207 felem_to_bin66(b_in, in);
208 flip_endian(b_out, b_in, sizeof b_out);
209 return BN_bin2bn(b_out, sizeof b_out, out);
210}
3e00b4c9 211
3a83462d
MC
212/*-
213 * Field operations
214 * ----------------
215 */
3e00b4c9
BM
216
217static void felem_one(felem out)
0f113f3e
MC
218{
219 out[0] = 1;
220 out[1] = 0;
221 out[2] = 0;
222 out[3] = 0;
223 out[4] = 0;
224 out[5] = 0;
225 out[6] = 0;
226 out[7] = 0;
227 out[8] = 0;
228}
3e00b4c9
BM
229
230static void felem_assign(felem out, const felem in)
0f113f3e
MC
231{
232 out[0] = in[0];
233 out[1] = in[1];
234 out[2] = in[2];
235 out[3] = in[3];
236 out[4] = in[4];
237 out[5] = in[5];
238 out[6] = in[6];
239 out[7] = in[7];
240 out[8] = in[8];
241}
3e00b4c9
BM
242
243/* felem_sum64 sets out = out + in. */
244static void felem_sum64(felem out, const felem in)
0f113f3e
MC
245{
246 out[0] += in[0];
247 out[1] += in[1];
248 out[2] += in[2];
249 out[3] += in[3];
250 out[4] += in[4];
251 out[5] += in[5];
252 out[6] += in[6];
253 out[7] += in[7];
254 out[8] += in[8];
255}
3e00b4c9
BM
256
257/* felem_scalar sets out = in * scalar */
258static void felem_scalar(felem out, const felem in, limb scalar)
0f113f3e
MC
259{
260 out[0] = in[0] * scalar;
261 out[1] = in[1] * scalar;
262 out[2] = in[2] * scalar;
263 out[3] = in[3] * scalar;
264 out[4] = in[4] * scalar;
265 out[5] = in[5] * scalar;
266 out[6] = in[6] * scalar;
267 out[7] = in[7] * scalar;
268 out[8] = in[8] * scalar;
269}
3e00b4c9
BM
270
271/* felem_scalar64 sets out = out * scalar */
272static void felem_scalar64(felem out, limb scalar)
0f113f3e
MC
273{
274 out[0] *= scalar;
275 out[1] *= scalar;
276 out[2] *= scalar;
277 out[3] *= scalar;
278 out[4] *= scalar;
279 out[5] *= scalar;
280 out[6] *= scalar;
281 out[7] *= scalar;
282 out[8] *= scalar;
283}
3e00b4c9
BM
284
285/* felem_scalar128 sets out = out * scalar */
286static void felem_scalar128(largefelem out, limb scalar)
0f113f3e
MC
287{
288 out[0] *= scalar;
289 out[1] *= scalar;
290 out[2] *= scalar;
291 out[3] *= scalar;
292 out[4] *= scalar;
293 out[5] *= scalar;
294 out[6] *= scalar;
295 out[7] *= scalar;
296 out[8] *= scalar;
297}
3e00b4c9 298
1d97c843
TH
299/*-
300 * felem_neg sets |out| to |-in|
3e00b4c9
BM
301 * On entry:
302 * in[i] < 2^59 + 2^14
303 * On exit:
304 * out[i] < 2^62
305 */
306static void felem_neg(felem out, const felem in)
0f113f3e
MC
307{
308 /* In order to prevent underflow, we subtract from 0 mod p. */
309 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
310 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
311
312 out[0] = two62m3 - in[0];
313 out[1] = two62m2 - in[1];
314 out[2] = two62m2 - in[2];
315 out[3] = two62m2 - in[3];
316 out[4] = two62m2 - in[4];
317 out[5] = two62m2 - in[5];
318 out[6] = two62m2 - in[6];
319 out[7] = two62m2 - in[7];
320 out[8] = two62m2 - in[8];
321}
3e00b4c9 322
1d97c843
TH
323/*-
324 * felem_diff64 subtracts |in| from |out|
3e00b4c9
BM
325 * On entry:
326 * in[i] < 2^59 + 2^14
327 * On exit:
328 * out[i] < out[i] + 2^62
329 */
330static void felem_diff64(felem out, const felem in)
0f113f3e
MC
331{
332 /*
333 * In order to prevent underflow, we add 0 mod p before subtracting.
334 */
335 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
336 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
337
338 out[0] += two62m3 - in[0];
339 out[1] += two62m2 - in[1];
340 out[2] += two62m2 - in[2];
341 out[3] += two62m2 - in[3];
342 out[4] += two62m2 - in[4];
343 out[5] += two62m2 - in[5];
344 out[6] += two62m2 - in[6];
345 out[7] += two62m2 - in[7];
346 out[8] += two62m2 - in[8];
347}
3e00b4c9 348
1d97c843
TH
349/*-
350 * felem_diff_128_64 subtracts |in| from |out|
3e00b4c9
BM
351 * On entry:
352 * in[i] < 2^62 + 2^17
353 * On exit:
354 * out[i] < out[i] + 2^63
355 */
356static void felem_diff_128_64(largefelem out, const felem in)
0f113f3e
MC
357{
358 /*
359 * In order to prevent underflow, we add 0 mod p before subtracting.
360 */
361 static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5);
362 static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4);
363
364 out[0] += two63m6 - in[0];
365 out[1] += two63m5 - in[1];
366 out[2] += two63m5 - in[2];
367 out[3] += two63m5 - in[3];
368 out[4] += two63m5 - in[4];
369 out[5] += two63m5 - in[5];
370 out[6] += two63m5 - in[6];
371 out[7] += two63m5 - in[7];
372 out[8] += two63m5 - in[8];
373}
3e00b4c9 374
1d97c843
TH
375/*-
376 * felem_diff_128_64 subtracts |in| from |out|
3e00b4c9
BM
377 * On entry:
378 * in[i] < 2^126
379 * On exit:
380 * out[i] < out[i] + 2^127 - 2^69
381 */
382static void felem_diff128(largefelem out, const largefelem in)
0f113f3e
MC
383{
384 /*
385 * In order to prevent underflow, we add 0 mod p before subtracting.
386 */
387 static const uint128_t two127m70 =
388 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);
389 static const uint128_t two127m69 =
390 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);
391
392 out[0] += (two127m70 - in[0]);
393 out[1] += (two127m69 - in[1]);
394 out[2] += (two127m69 - in[2]);
395 out[3] += (two127m69 - in[3]);
396 out[4] += (two127m69 - in[4]);
397 out[5] += (two127m69 - in[5]);
398 out[6] += (two127m69 - in[6]);
399 out[7] += (two127m69 - in[7]);
400 out[8] += (two127m69 - in[8]);
401}
3e00b4c9 402
1d97c843
TH
403/*-
404 * felem_square sets |out| = |in|^2
3e00b4c9
BM
405 * On entry:
406 * in[i] < 2^62
407 * On exit:
408 * out[i] < 17 * max(in[i]) * max(in[i])
409 */
410static void felem_square(largefelem out, const felem in)
0f113f3e
MC
411{
412 felem inx2, inx4;
413 felem_scalar(inx2, in, 2);
414 felem_scalar(inx4, in, 4);
415
35a1cc90
MC
416 /*-
417 * We have many cases were we want to do
418 * in[x] * in[y] +
419 * in[y] * in[x]
420 * This is obviously just
421 * 2 * in[x] * in[y]
422 * However, rather than do the doubling on the 128 bit result, we
423 * double one of the inputs to the multiplication by reading from
424 * |inx2|
425 */
0f113f3e
MC
426
427 out[0] = ((uint128_t) in[0]) * in[0];
428 out[1] = ((uint128_t) in[0]) * inx2[1];
429 out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1];
430 out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2];
431 out[4] = ((uint128_t) in[0]) * inx2[4] +
4eb504ae 432 ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2];
0f113f3e 433 out[5] = ((uint128_t) in[0]) * inx2[5] +
4eb504ae 434 ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3];
0f113f3e 435 out[6] = ((uint128_t) in[0]) * inx2[6] +
4eb504ae
AP
436 ((uint128_t) in[1]) * inx2[5] +
437 ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3];
0f113f3e 438 out[7] = ((uint128_t) in[0]) * inx2[7] +
4eb504ae
AP
439 ((uint128_t) in[1]) * inx2[6] +
440 ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4];
0f113f3e 441 out[8] = ((uint128_t) in[0]) * inx2[8] +
4eb504ae
AP
442 ((uint128_t) in[1]) * inx2[7] +
443 ((uint128_t) in[2]) * inx2[6] +
444 ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4];
0f113f3e
MC
445
446 /*
447 * The remaining limbs fall above 2^521, with the first falling at 2^522.
448 * They correspond to locations one bit up from the limbs produced above
449 * so we would have to multiply by two to align them. Again, rather than
450 * operate on the 128-bit result, we double one of the inputs to the
451 * multiplication. If we want to double for both this reason, and the
452 * reason above, then we end up multiplying by four.
453 */
454
455 /* 9 */
456 out[0] += ((uint128_t) in[1]) * inx4[8] +
4eb504ae
AP
457 ((uint128_t) in[2]) * inx4[7] +
458 ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5];
0f113f3e
MC
459
460 /* 10 */
461 out[1] += ((uint128_t) in[2]) * inx4[8] +
4eb504ae
AP
462 ((uint128_t) in[3]) * inx4[7] +
463 ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5];
0f113f3e
MC
464
465 /* 11 */
466 out[2] += ((uint128_t) in[3]) * inx4[8] +
4eb504ae 467 ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6];
0f113f3e
MC
468
469 /* 12 */
470 out[3] += ((uint128_t) in[4]) * inx4[8] +
4eb504ae 471 ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6];
0f113f3e
MC
472
473 /* 13 */
474 out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7];
475
476 /* 14 */
477 out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7];
478
479 /* 15 */
480 out[6] += ((uint128_t) in[7]) * inx4[8];
481
482 /* 16 */
483 out[7] += ((uint128_t) in[8]) * inx2[8];
484}
3e00b4c9 485
1d97c843
TH
486/*-
487 * felem_mul sets |out| = |in1| * |in2|
3e00b4c9
BM
488 * On entry:
489 * in1[i] < 2^64
490 * in2[i] < 2^63
491 * On exit:
492 * out[i] < 17 * max(in1[i]) * max(in2[i])
493 */
494static void felem_mul(largefelem out, const felem in1, const felem in2)
0f113f3e
MC
495{
496 felem in2x2;
497 felem_scalar(in2x2, in2, 2);
498
499 out[0] = ((uint128_t) in1[0]) * in2[0];
500
4eb504ae
AP
501 out[1] = ((uint128_t) in1[0]) * in2[1] +
502 ((uint128_t) in1[1]) * in2[0];
0f113f3e
MC
503
504 out[2] = ((uint128_t) in1[0]) * in2[2] +
4eb504ae
AP
505 ((uint128_t) in1[1]) * in2[1] +
506 ((uint128_t) in1[2]) * in2[0];
0f113f3e
MC
507
508 out[3] = ((uint128_t) in1[0]) * in2[3] +
4eb504ae
AP
509 ((uint128_t) in1[1]) * in2[2] +
510 ((uint128_t) in1[2]) * in2[1] +
511 ((uint128_t) in1[3]) * in2[0];
0f113f3e
MC
512
513 out[4] = ((uint128_t) in1[0]) * in2[4] +
4eb504ae
AP
514 ((uint128_t) in1[1]) * in2[3] +
515 ((uint128_t) in1[2]) * in2[2] +
516 ((uint128_t) in1[3]) * in2[1] +
517 ((uint128_t) in1[4]) * in2[0];
0f113f3e
MC
518
519 out[5] = ((uint128_t) in1[0]) * in2[5] +
4eb504ae
AP
520 ((uint128_t) in1[1]) * in2[4] +
521 ((uint128_t) in1[2]) * in2[3] +
522 ((uint128_t) in1[3]) * in2[2] +
523 ((uint128_t) in1[4]) * in2[1] +
524 ((uint128_t) in1[5]) * in2[0];
0f113f3e
MC
525
526 out[6] = ((uint128_t) in1[0]) * in2[6] +
4eb504ae
AP
527 ((uint128_t) in1[1]) * in2[5] +
528 ((uint128_t) in1[2]) * in2[4] +
529 ((uint128_t) in1[3]) * in2[3] +
530 ((uint128_t) in1[4]) * in2[2] +
531 ((uint128_t) in1[5]) * in2[1] +
532 ((uint128_t) in1[6]) * in2[0];
0f113f3e
MC
533
534 out[7] = ((uint128_t) in1[0]) * in2[7] +
4eb504ae
AP
535 ((uint128_t) in1[1]) * in2[6] +
536 ((uint128_t) in1[2]) * in2[5] +
537 ((uint128_t) in1[3]) * in2[4] +
538 ((uint128_t) in1[4]) * in2[3] +
539 ((uint128_t) in1[5]) * in2[2] +
540 ((uint128_t) in1[6]) * in2[1] +
541 ((uint128_t) in1[7]) * in2[0];
0f113f3e
MC
542
543 out[8] = ((uint128_t) in1[0]) * in2[8] +
4eb504ae
AP
544 ((uint128_t) in1[1]) * in2[7] +
545 ((uint128_t) in1[2]) * in2[6] +
546 ((uint128_t) in1[3]) * in2[5] +
547 ((uint128_t) in1[4]) * in2[4] +
548 ((uint128_t) in1[5]) * in2[3] +
549 ((uint128_t) in1[6]) * in2[2] +
550 ((uint128_t) in1[7]) * in2[1] +
551 ((uint128_t) in1[8]) * in2[0];
0f113f3e
MC
552
553 /* See comment in felem_square about the use of in2x2 here */
554
555 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
4eb504ae
AP
556 ((uint128_t) in1[2]) * in2x2[7] +
557 ((uint128_t) in1[3]) * in2x2[6] +
558 ((uint128_t) in1[4]) * in2x2[5] +
559 ((uint128_t) in1[5]) * in2x2[4] +
560 ((uint128_t) in1[6]) * in2x2[3] +
561 ((uint128_t) in1[7]) * in2x2[2] +
562 ((uint128_t) in1[8]) * in2x2[1];
0f113f3e
MC
563
564 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
4eb504ae
AP
565 ((uint128_t) in1[3]) * in2x2[7] +
566 ((uint128_t) in1[4]) * in2x2[6] +
567 ((uint128_t) in1[5]) * in2x2[5] +
568 ((uint128_t) in1[6]) * in2x2[4] +
569 ((uint128_t) in1[7]) * in2x2[3] +
570 ((uint128_t) in1[8]) * in2x2[2];
0f113f3e
MC
571
572 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
4eb504ae
AP
573 ((uint128_t) in1[4]) * in2x2[7] +
574 ((uint128_t) in1[5]) * in2x2[6] +
575 ((uint128_t) in1[6]) * in2x2[5] +
576 ((uint128_t) in1[7]) * in2x2[4] +
577 ((uint128_t) in1[8]) * in2x2[3];
0f113f3e
MC
578
579 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
4eb504ae
AP
580 ((uint128_t) in1[5]) * in2x2[7] +
581 ((uint128_t) in1[6]) * in2x2[6] +
582 ((uint128_t) in1[7]) * in2x2[5] +
583 ((uint128_t) in1[8]) * in2x2[4];
0f113f3e
MC
584
585 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
4eb504ae
AP
586 ((uint128_t) in1[6]) * in2x2[7] +
587 ((uint128_t) in1[7]) * in2x2[6] +
588 ((uint128_t) in1[8]) * in2x2[5];
0f113f3e
MC
589
590 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
4eb504ae
AP
591 ((uint128_t) in1[7]) * in2x2[7] +
592 ((uint128_t) in1[8]) * in2x2[6];
0f113f3e
MC
593
594 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
4eb504ae 595 ((uint128_t) in1[8]) * in2x2[7];
0f113f3e
MC
596
597 out[7] += ((uint128_t) in1[8]) * in2x2[8];
598}
3e00b4c9
BM
599
600static const limb bottom52bits = 0xfffffffffffff;
601
1d97c843
TH
602/*-
603 * felem_reduce converts a largefelem to an felem.
3e00b4c9
BM
604 * On entry:
605 * in[i] < 2^128
606 * On exit:
607 * out[i] < 2^59 + 2^14
608 */
609static void felem_reduce(felem out, const largefelem in)
0f113f3e
MC
610{
611 u64 overflow1, overflow2;
612
613 out[0] = ((limb) in[0]) & bottom58bits;
614 out[1] = ((limb) in[1]) & bottom58bits;
615 out[2] = ((limb) in[2]) & bottom58bits;
616 out[3] = ((limb) in[3]) & bottom58bits;
617 out[4] = ((limb) in[4]) & bottom58bits;
618 out[5] = ((limb) in[5]) & bottom58bits;
619 out[6] = ((limb) in[6]) & bottom58bits;
620 out[7] = ((limb) in[7]) & bottom58bits;
621 out[8] = ((limb) in[8]) & bottom58bits;
622
623 /* out[i] < 2^58 */
624
625 out[1] += ((limb) in[0]) >> 58;
626 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
35a1cc90
MC
627 /*-
628 * out[1] < 2^58 + 2^6 + 2^58
629 * = 2^59 + 2^6
630 */
0f113f3e
MC
631 out[2] += ((limb) (in[0] >> 64)) >> 52;
632
633 out[2] += ((limb) in[1]) >> 58;
634 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
635 out[3] += ((limb) (in[1] >> 64)) >> 52;
636
637 out[3] += ((limb) in[2]) >> 58;
638 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
639 out[4] += ((limb) (in[2] >> 64)) >> 52;
640
641 out[4] += ((limb) in[3]) >> 58;
642 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
643 out[5] += ((limb) (in[3] >> 64)) >> 52;
644
645 out[5] += ((limb) in[4]) >> 58;
646 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
647 out[6] += ((limb) (in[4] >> 64)) >> 52;
648
649 out[6] += ((limb) in[5]) >> 58;
650 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
651 out[7] += ((limb) (in[5] >> 64)) >> 52;
652
653 out[7] += ((limb) in[6]) >> 58;
654 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
655 out[8] += ((limb) (in[6] >> 64)) >> 52;
656
657 out[8] += ((limb) in[7]) >> 58;
658 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
35a1cc90
MC
659 /*-
660 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
661 * < 2^59 + 2^13
662 */
0f113f3e
MC
663 overflow1 = ((limb) (in[7] >> 64)) >> 52;
664
665 overflow1 += ((limb) in[8]) >> 58;
666 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
667 overflow2 = ((limb) (in[8] >> 64)) >> 52;
668
669 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
670 overflow2 <<= 1; /* overflow2 < 2^13 */
671
672 out[0] += overflow1; /* out[0] < 2^60 */
673 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
674
675 out[1] += out[0] >> 58;
676 out[0] &= bottom58bits;
35a1cc90
MC
677 /*-
678 * out[0] < 2^58
679 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
680 * < 2^59 + 2^14
681 */
0f113f3e 682}
3e00b4c9
BM
683
684static void felem_square_reduce(felem out, const felem in)
0f113f3e
MC
685{
686 largefelem tmp;
687 felem_square(tmp, in);
688 felem_reduce(out, tmp);
689}
3e00b4c9
BM
690
691static void felem_mul_reduce(felem out, const felem in1, const felem in2)
0f113f3e
MC
692{
693 largefelem tmp;
694 felem_mul(tmp, in1, in2);
695 felem_reduce(out, tmp);
696}
3e00b4c9 697
1d97c843
TH
698/*-
699 * felem_inv calculates |out| = |in|^{-1}
3e00b4c9
BM
700 *
701 * Based on Fermat's Little Theorem:
702 * a^p = a (mod p)
703 * a^{p-1} = 1 (mod p)
704 * a^{p-2} = a^{-1} (mod p)
705 */
706static void felem_inv(felem out, const felem in)
0f113f3e
MC
707{
708 felem ftmp, ftmp2, ftmp3, ftmp4;
709 largefelem tmp;
710 unsigned i;
711
712 felem_square(tmp, in);
713 felem_reduce(ftmp, tmp); /* 2^1 */
714 felem_mul(tmp, in, ftmp);
715 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
716 felem_assign(ftmp2, ftmp);
717 felem_square(tmp, ftmp);
718 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
719 felem_mul(tmp, in, ftmp);
720 felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
721 felem_square(tmp, ftmp);
722 felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
723
724 felem_square(tmp, ftmp2);
725 felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
726 felem_square(tmp, ftmp3);
727 felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
728 felem_mul(tmp, ftmp3, ftmp2);
729 felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
730
731 felem_assign(ftmp2, ftmp3);
732 felem_square(tmp, ftmp3);
733 felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
734 felem_square(tmp, ftmp3);
735 felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
736 felem_square(tmp, ftmp3);
737 felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
738 felem_square(tmp, ftmp3);
739 felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
740 felem_assign(ftmp4, ftmp3);
741 felem_mul(tmp, ftmp3, ftmp);
742 felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
743 felem_square(tmp, ftmp4);
744 felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
745 felem_mul(tmp, ftmp3, ftmp2);
746 felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
747 felem_assign(ftmp2, ftmp3);
748
749 for (i = 0; i < 8; i++) {
750 felem_square(tmp, ftmp3);
751 felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
752 }
753 felem_mul(tmp, ftmp3, ftmp2);
754 felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
755 felem_assign(ftmp2, ftmp3);
756
757 for (i = 0; i < 16; i++) {
758 felem_square(tmp, ftmp3);
759 felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
760 }
761 felem_mul(tmp, ftmp3, ftmp2);
762 felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
763 felem_assign(ftmp2, ftmp3);
764
765 for (i = 0; i < 32; i++) {
766 felem_square(tmp, ftmp3);
767 felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
768 }
769 felem_mul(tmp, ftmp3, ftmp2);
770 felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
771 felem_assign(ftmp2, ftmp3);
772
773 for (i = 0; i < 64; i++) {
774 felem_square(tmp, ftmp3);
775 felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
776 }
777 felem_mul(tmp, ftmp3, ftmp2);
778 felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
779 felem_assign(ftmp2, ftmp3);
780
781 for (i = 0; i < 128; i++) {
782 felem_square(tmp, ftmp3);
783 felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
784 }
785 felem_mul(tmp, ftmp3, ftmp2);
786 felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
787 felem_assign(ftmp2, ftmp3);
788
789 for (i = 0; i < 256; i++) {
790 felem_square(tmp, ftmp3);
791 felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
792 }
793 felem_mul(tmp, ftmp3, ftmp2);
794 felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
795
796 for (i = 0; i < 9; i++) {
797 felem_square(tmp, ftmp3);
798 felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
799 }
800 felem_mul(tmp, ftmp3, ftmp4);
801 felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
802 felem_mul(tmp, ftmp3, in);
803 felem_reduce(out, tmp); /* 2^512 - 3 */
3e00b4c9
BM
804}
805
806/* This is 2^521-1, expressed as an felem */
0f113f3e
MC
807static const felem kPrime = {
808 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
809 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
810 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
811};
3e00b4c9 812
1d97c843
TH
813/*-
814 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
3e00b4c9
BM
815 * otherwise.
816 * On entry:
817 * in[i] < 2^59 + 2^14
818 */
819static limb felem_is_zero(const felem in)
0f113f3e
MC
820{
821 felem ftmp;
822 limb is_zero, is_p;
823 felem_assign(ftmp, in);
824
825 ftmp[0] += ftmp[8] >> 57;
826 ftmp[8] &= bottom57bits;
827 /* ftmp[8] < 2^57 */
828 ftmp[1] += ftmp[0] >> 58;
829 ftmp[0] &= bottom58bits;
830 ftmp[2] += ftmp[1] >> 58;
831 ftmp[1] &= bottom58bits;
832 ftmp[3] += ftmp[2] >> 58;
833 ftmp[2] &= bottom58bits;
834 ftmp[4] += ftmp[3] >> 58;
835 ftmp[3] &= bottom58bits;
836 ftmp[5] += ftmp[4] >> 58;
837 ftmp[4] &= bottom58bits;
838 ftmp[6] += ftmp[5] >> 58;
839 ftmp[5] &= bottom58bits;
840 ftmp[7] += ftmp[6] >> 58;
841 ftmp[6] &= bottom58bits;
842 ftmp[8] += ftmp[7] >> 58;
843 ftmp[7] &= bottom58bits;
844 /* ftmp[8] < 2^57 + 4 */
845
846 /*
847 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater
848 * than our bound for ftmp[8]. Therefore we only have to check if the
849 * zero is zero or 2^521-1.
850 */
851
852 is_zero = 0;
853 is_zero |= ftmp[0];
854 is_zero |= ftmp[1];
855 is_zero |= ftmp[2];
856 is_zero |= ftmp[3];
857 is_zero |= ftmp[4];
858 is_zero |= ftmp[5];
859 is_zero |= ftmp[6];
860 is_zero |= ftmp[7];
861 is_zero |= ftmp[8];
862
863 is_zero--;
864 /*
865 * We know that ftmp[i] < 2^63, therefore the only way that the top bit
866 * can be set is if is_zero was 0 before the decrement.
867 */
868 is_zero = ((s64) is_zero) >> 63;
869
870 is_p = ftmp[0] ^ kPrime[0];
871 is_p |= ftmp[1] ^ kPrime[1];
872 is_p |= ftmp[2] ^ kPrime[2];
873 is_p |= ftmp[3] ^ kPrime[3];
874 is_p |= ftmp[4] ^ kPrime[4];
875 is_p |= ftmp[5] ^ kPrime[5];
876 is_p |= ftmp[6] ^ kPrime[6];
877 is_p |= ftmp[7] ^ kPrime[7];
878 is_p |= ftmp[8] ^ kPrime[8];
879
880 is_p--;
881 is_p = ((s64) is_p) >> 63;
882
883 is_zero |= is_p;
884 return is_zero;
885}
3e00b4c9
BM
886
887static int felem_is_zero_int(const felem in)
0f113f3e
MC
888{
889 return (int)(felem_is_zero(in) & ((limb) 1));
890}
3e00b4c9 891
1d97c843
TH
892/*-
893 * felem_contract converts |in| to its unique, minimal representation.
3e00b4c9
BM
894 * On entry:
895 * in[i] < 2^59 + 2^14
896 */
897static void felem_contract(felem out, const felem in)
0f113f3e
MC
898{
899 limb is_p, is_greater, sign;
900 static const limb two58 = ((limb) 1) << 58;
901
902 felem_assign(out, in);
903
904 out[0] += out[8] >> 57;
905 out[8] &= bottom57bits;
906 /* out[8] < 2^57 */
907 out[1] += out[0] >> 58;
908 out[0] &= bottom58bits;
909 out[2] += out[1] >> 58;
910 out[1] &= bottom58bits;
911 out[3] += out[2] >> 58;
912 out[2] &= bottom58bits;
913 out[4] += out[3] >> 58;
914 out[3] &= bottom58bits;
915 out[5] += out[4] >> 58;
916 out[4] &= bottom58bits;
917 out[6] += out[5] >> 58;
918 out[5] &= bottom58bits;
919 out[7] += out[6] >> 58;
920 out[6] &= bottom58bits;
921 out[8] += out[7] >> 58;
922 out[7] &= bottom58bits;
923 /* out[8] < 2^57 + 4 */
924
925 /*
926 * If the value is greater than 2^521-1 then we have to subtract 2^521-1
927 * out. See the comments in felem_is_zero regarding why we don't test for
928 * other multiples of the prime.
929 */
930
931 /*
932 * First, if |out| is equal to 2^521-1, we subtract it out to get zero.
933 */
934
935 is_p = out[0] ^ kPrime[0];
936 is_p |= out[1] ^ kPrime[1];
937 is_p |= out[2] ^ kPrime[2];
938 is_p |= out[3] ^ kPrime[3];
939 is_p |= out[4] ^ kPrime[4];
940 is_p |= out[5] ^ kPrime[5];
941 is_p |= out[6] ^ kPrime[6];
942 is_p |= out[7] ^ kPrime[7];
943 is_p |= out[8] ^ kPrime[8];
944
945 is_p--;
946 is_p &= is_p << 32;
947 is_p &= is_p << 16;
948 is_p &= is_p << 8;
949 is_p &= is_p << 4;
950 is_p &= is_p << 2;
951 is_p &= is_p << 1;
952 is_p = ((s64) is_p) >> 63;
953 is_p = ~is_p;
954
955 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
956
957 out[0] &= is_p;
958 out[1] &= is_p;
959 out[2] &= is_p;
960 out[3] &= is_p;
961 out[4] &= is_p;
962 out[5] &= is_p;
963 out[6] &= is_p;
964 out[7] &= is_p;
965 out[8] &= is_p;
966
967 /*
968 * In order to test that |out| >= 2^521-1 we need only test if out[8] >>
969 * 57 is greater than zero as (2^521-1) + x >= 2^522
970 */
971 is_greater = out[8] >> 57;
972 is_greater |= is_greater << 32;
973 is_greater |= is_greater << 16;
974 is_greater |= is_greater << 8;
975 is_greater |= is_greater << 4;
976 is_greater |= is_greater << 2;
977 is_greater |= is_greater << 1;
978 is_greater = ((s64) is_greater) >> 63;
979
980 out[0] -= kPrime[0] & is_greater;
981 out[1] -= kPrime[1] & is_greater;
982 out[2] -= kPrime[2] & is_greater;
983 out[3] -= kPrime[3] & is_greater;
984 out[4] -= kPrime[4] & is_greater;
985 out[5] -= kPrime[5] & is_greater;
986 out[6] -= kPrime[6] & is_greater;
987 out[7] -= kPrime[7] & is_greater;
988 out[8] -= kPrime[8] & is_greater;
989
990 /* Eliminate negative coefficients */
991 sign = -(out[0] >> 63);
992 out[0] += (two58 & sign);
993 out[1] -= (1 & sign);
994 sign = -(out[1] >> 63);
995 out[1] += (two58 & sign);
996 out[2] -= (1 & sign);
997 sign = -(out[2] >> 63);
998 out[2] += (two58 & sign);
999 out[3] -= (1 & sign);
1000 sign = -(out[3] >> 63);
1001 out[3] += (two58 & sign);
1002 out[4] -= (1 & sign);
1003 sign = -(out[4] >> 63);
1004 out[4] += (two58 & sign);
1005 out[5] -= (1 & sign);
1006 sign = -(out[0] >> 63);
1007 out[5] += (two58 & sign);
1008 out[6] -= (1 & sign);
1009 sign = -(out[6] >> 63);
1010 out[6] += (two58 & sign);
1011 out[7] -= (1 & sign);
1012 sign = -(out[7] >> 63);
1013 out[7] += (two58 & sign);
1014 out[8] -= (1 & sign);
1015 sign = -(out[5] >> 63);
1016 out[5] += (two58 & sign);
1017 out[6] -= (1 & sign);
1018 sign = -(out[6] >> 63);
1019 out[6] += (two58 & sign);
1020 out[7] -= (1 & sign);
1021 sign = -(out[7] >> 63);
1022 out[7] += (two58 & sign);
1023 out[8] -= (1 & sign);
1024}
3e00b4c9 1025
1d97c843
TH
1026/*-
1027 * Group operations
3e00b4c9
BM
1028 * ----------------
1029 *
1030 * Building on top of the field operations we have the operations on the
1031 * elliptic curve group itself. Points on the curve are represented in Jacobian
1032 * coordinates */
1033
1d97c843 1034/*-
0d4fb843 1035 * point_double calculates 2*(x_in, y_in, z_in)
3e00b4c9
BM
1036 *
1037 * The method is taken from:
1038 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1039 *
1040 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1041 * while x_out == y_in is not (maybe this works, but it's not tested). */
1042static void
1043point_double(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1044 const felem x_in, const felem y_in, const felem z_in)
1045{
1046 largefelem tmp, tmp2;
1047 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1048
1049 felem_assign(ftmp, x_in);
1050 felem_assign(ftmp2, x_in);
1051
1052 /* delta = z^2 */
1053 felem_square(tmp, z_in);
1054 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
1055
1056 /* gamma = y^2 */
1057 felem_square(tmp, y_in);
1058 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
1059
1060 /* beta = x*gamma */
1061 felem_mul(tmp, x_in, gamma);
1062 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
1063
1064 /* alpha = 3*(x-delta)*(x+delta) */
1065 felem_diff64(ftmp, delta);
1066 /* ftmp[i] < 2^61 */
1067 felem_sum64(ftmp2, delta);
1068 /* ftmp2[i] < 2^60 + 2^15 */
1069 felem_scalar64(ftmp2, 3);
1070 /* ftmp2[i] < 3*2^60 + 3*2^15 */
1071 felem_mul(tmp, ftmp, ftmp2);
50e735f9
MC
1072 /*-
1073 * tmp[i] < 17(3*2^121 + 3*2^76)
1074 * = 61*2^121 + 61*2^76
1075 * < 64*2^121 + 64*2^76
1076 * = 2^127 + 2^82
1077 * < 2^128
1078 */
0f113f3e
MC
1079 felem_reduce(alpha, tmp);
1080
1081 /* x' = alpha^2 - 8*beta */
1082 felem_square(tmp, alpha);
1083 /*
1084 * tmp[i] < 17*2^120 < 2^125
1085 */
1086 felem_assign(ftmp, beta);
1087 felem_scalar64(ftmp, 8);
1088 /* ftmp[i] < 2^62 + 2^17 */
1089 felem_diff_128_64(tmp, ftmp);
1090 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1091 felem_reduce(x_out, tmp);
1092
1093 /* z' = (y + z)^2 - gamma - delta */
1094 felem_sum64(delta, gamma);
1095 /* delta[i] < 2^60 + 2^15 */
1096 felem_assign(ftmp, y_in);
1097 felem_sum64(ftmp, z_in);
1098 /* ftmp[i] < 2^60 + 2^15 */
1099 felem_square(tmp, ftmp);
1100 /*
1101 * tmp[i] < 17(2^122) < 2^127
1102 */
1103 felem_diff_128_64(tmp, delta);
1104 /* tmp[i] < 2^127 + 2^63 */
1105 felem_reduce(z_out, tmp);
1106
1107 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1108 felem_scalar64(beta, 4);
1109 /* beta[i] < 2^61 + 2^16 */
1110 felem_diff64(beta, x_out);
1111 /* beta[i] < 2^61 + 2^60 + 2^16 */
1112 felem_mul(tmp, alpha, beta);
50e735f9
MC
1113 /*-
1114 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1115 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1116 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1117 * < 2^128
1118 */
0f113f3e 1119 felem_square(tmp2, gamma);
50e735f9
MC
1120 /*-
1121 * tmp2[i] < 17*(2^59 + 2^14)^2
1122 * = 17*(2^118 + 2^74 + 2^28)
1123 */
0f113f3e 1124 felem_scalar128(tmp2, 8);
50e735f9
MC
1125 /*-
1126 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1127 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1128 * < 2^126
1129 */
0f113f3e 1130 felem_diff128(tmp, tmp2);
50e735f9
MC
1131 /*-
1132 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1133 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1134 * 2^74 + 2^69 + 2^34 + 2^30
1135 * < 2^128
1136 */
0f113f3e
MC
1137 felem_reduce(y_out, tmp);
1138}
3e00b4c9
BM
1139
1140/* copy_conditional copies in to out iff mask is all ones. */
0f113f3e
MC
1141static void copy_conditional(felem out, const felem in, limb mask)
1142{
1143 unsigned i;
1144 for (i = 0; i < NLIMBS; ++i) {
1145 const limb tmp = mask & (in[i] ^ out[i]);
1146 out[i] ^= tmp;
1147 }
1148}
3e00b4c9 1149
1d97c843 1150/*-
0d4fb843 1151 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
3e00b4c9
BM
1152 *
1153 * The method is taken from
1154 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1155 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1156 *
1157 * This function includes a branch for checking whether the two input points
1158 * are equal (while not equal to the point at infinity). This case never
1159 * happens during single point multiplication, so there is no timing leak for
1160 * ECDH or ECDSA signing. */
1161static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
1162 const felem x1, const felem y1, const felem z1,
1163 const int mixed, const felem x2, const felem y2,
1164 const felem z2)
1165{
1166 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1167 largefelem tmp, tmp2;
1168 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1169
1170 z1_is_zero = felem_is_zero(z1);
1171 z2_is_zero = felem_is_zero(z2);
1172
1173 /* ftmp = z1z1 = z1**2 */
1174 felem_square(tmp, z1);
1175 felem_reduce(ftmp, tmp);
1176
1177 if (!mixed) {
1178 /* ftmp2 = z2z2 = z2**2 */
1179 felem_square(tmp, z2);
1180 felem_reduce(ftmp2, tmp);
1181
1182 /* u1 = ftmp3 = x1*z2z2 */
1183 felem_mul(tmp, x1, ftmp2);
1184 felem_reduce(ftmp3, tmp);
1185
1186 /* ftmp5 = z1 + z2 */
1187 felem_assign(ftmp5, z1);
1188 felem_sum64(ftmp5, z2);
1189 /* ftmp5[i] < 2^61 */
1190
1191 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1192 felem_square(tmp, ftmp5);
1193 /* tmp[i] < 17*2^122 */
1194 felem_diff_128_64(tmp, ftmp);
1195 /* tmp[i] < 17*2^122 + 2^63 */
1196 felem_diff_128_64(tmp, ftmp2);
1197 /* tmp[i] < 17*2^122 + 2^64 */
1198 felem_reduce(ftmp5, tmp);
1199
1200 /* ftmp2 = z2 * z2z2 */
1201 felem_mul(tmp, ftmp2, z2);
1202 felem_reduce(ftmp2, tmp);
1203
1204 /* s1 = ftmp6 = y1 * z2**3 */
1205 felem_mul(tmp, y1, ftmp2);
1206 felem_reduce(ftmp6, tmp);
1207 } else {
1208 /*
1209 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1210 */
1211
1212 /* u1 = ftmp3 = x1*z2z2 */
1213 felem_assign(ftmp3, x1);
1214
1215 /* ftmp5 = 2*z1z2 */
1216 felem_scalar(ftmp5, z1, 2);
1217
1218 /* s1 = ftmp6 = y1 * z2**3 */
1219 felem_assign(ftmp6, y1);
1220 }
1221
1222 /* u2 = x2*z1z1 */
1223 felem_mul(tmp, x2, ftmp);
1224 /* tmp[i] < 17*2^120 */
1225
1226 /* h = ftmp4 = u2 - u1 */
1227 felem_diff_128_64(tmp, ftmp3);
1228 /* tmp[i] < 17*2^120 + 2^63 */
1229 felem_reduce(ftmp4, tmp);
1230
1231 x_equal = felem_is_zero(ftmp4);
1232
1233 /* z_out = ftmp5 * h */
1234 felem_mul(tmp, ftmp5, ftmp4);
1235 felem_reduce(z_out, tmp);
1236
1237 /* ftmp = z1 * z1z1 */
1238 felem_mul(tmp, ftmp, z1);
1239 felem_reduce(ftmp, tmp);
1240
1241 /* s2 = tmp = y2 * z1**3 */
1242 felem_mul(tmp, y2, ftmp);
1243 /* tmp[i] < 17*2^120 */
1244
1245 /* r = ftmp5 = (s2 - s1)*2 */
1246 felem_diff_128_64(tmp, ftmp6);
1247 /* tmp[i] < 17*2^120 + 2^63 */
1248 felem_reduce(ftmp5, tmp);
1249 y_equal = felem_is_zero(ftmp5);
1250 felem_scalar64(ftmp5, 2);
1251 /* ftmp5[i] < 2^61 */
1252
1253 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1254 point_double(x3, y3, z3, x1, y1, z1);
1255 return;
1256 }
1257
1258 /* I = ftmp = (2h)**2 */
1259 felem_assign(ftmp, ftmp4);
1260 felem_scalar64(ftmp, 2);
1261 /* ftmp[i] < 2^61 */
1262 felem_square(tmp, ftmp);
1263 /* tmp[i] < 17*2^122 */
1264 felem_reduce(ftmp, tmp);
1265
1266 /* J = ftmp2 = h * I */
1267 felem_mul(tmp, ftmp4, ftmp);
1268 felem_reduce(ftmp2, tmp);
1269
1270 /* V = ftmp4 = U1 * I */
1271 felem_mul(tmp, ftmp3, ftmp);
1272 felem_reduce(ftmp4, tmp);
1273
1274 /* x_out = r**2 - J - 2V */
1275 felem_square(tmp, ftmp5);
1276 /* tmp[i] < 17*2^122 */
1277 felem_diff_128_64(tmp, ftmp2);
1278 /* tmp[i] < 17*2^122 + 2^63 */
1279 felem_assign(ftmp3, ftmp4);
1280 felem_scalar64(ftmp4, 2);
1281 /* ftmp4[i] < 2^61 */
1282 felem_diff_128_64(tmp, ftmp4);
1283 /* tmp[i] < 17*2^122 + 2^64 */
1284 felem_reduce(x_out, tmp);
1285
1286 /* y_out = r(V-x_out) - 2 * s1 * J */
1287 felem_diff64(ftmp3, x_out);
1288 /*
1289 * ftmp3[i] < 2^60 + 2^60 = 2^61
1290 */
1291 felem_mul(tmp, ftmp5, ftmp3);
1292 /* tmp[i] < 17*2^122 */
1293 felem_mul(tmp2, ftmp6, ftmp2);
1294 /* tmp2[i] < 17*2^120 */
1295 felem_scalar128(tmp2, 2);
1296 /* tmp2[i] < 17*2^121 */
1297 felem_diff128(tmp, tmp2);
1298 /*-
1299 * tmp[i] < 2^127 - 2^69 + 17*2^122
1300 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1301 * < 2^127
1302 */
1303 felem_reduce(y_out, tmp);
1304
1305 copy_conditional(x_out, x2, z1_is_zero);
1306 copy_conditional(x_out, x1, z2_is_zero);
1307 copy_conditional(y_out, y2, z1_is_zero);
1308 copy_conditional(y_out, y1, z2_is_zero);
1309 copy_conditional(z_out, z2, z1_is_zero);
1310 copy_conditional(z_out, z1, z2_is_zero);
1311 felem_assign(x3, x_out);
1312 felem_assign(y3, y_out);
1313 felem_assign(z3, z_out);
1314}
3e00b4c9 1315
1d97c843
TH
1316/*-
1317 * Base point pre computation
3e00b4c9
BM
1318 * --------------------------
1319 *
1320 * Two different sorts of precomputed tables are used in the following code.
1321 * Each contain various points on the curve, where each point is three field
1322 * elements (x, y, z).
1323 *
1324 * For the base point table, z is usually 1 (0 for the point at infinity).
1325 * This table has 16 elements:
1326 * index | bits | point
1327 * ------+---------+------------------------------
1328 * 0 | 0 0 0 0 | 0G
1329 * 1 | 0 0 0 1 | 1G
1330 * 2 | 0 0 1 0 | 2^130G
1331 * 3 | 0 0 1 1 | (2^130 + 1)G
1332 * 4 | 0 1 0 0 | 2^260G
1333 * 5 | 0 1 0 1 | (2^260 + 1)G
1334 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1335 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1336 * 8 | 1 0 0 0 | 2^390G
1337 * 9 | 1 0 0 1 | (2^390 + 1)G
1338 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1339 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1340 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1341 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1342 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1343 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1344 *
1345 * The reason for this is so that we can clock bits into four different
1346 * locations when doing simple scalar multiplies against the base point.
1347 *
1348 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1349
1350/* gmul is the table of precomputed base points */
4eb504ae
AP
1351static const felem gmul[16][3] = {
1352{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1353 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1354 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
0f113f3e
MC
1355{{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1356 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1357 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1358 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1359 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1360 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1361 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1362{{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1363 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1364 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1365 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1366 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1367 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1368 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1369{{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1370 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1371 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1372 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1373 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1374 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1375 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1376{{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1377 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1378 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1379 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1380 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1381 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1382 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1383{{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1384 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1385 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1386 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1387 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1388 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1389 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1390{{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1391 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1392 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1393 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1394 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1395 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1396 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1397{{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1398 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1399 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1400 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1401 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1402 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1403 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1404{{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1405 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1406 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1407 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1408 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1409 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1410 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1411{{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1412 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1413 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1414 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1415 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1416 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1417 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1418{{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1419 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1420 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1421 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1422 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1423 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1424 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1425{{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1426 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1427 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1428 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1429 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1430 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1431 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1432{{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1433 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1434 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1435 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1436 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1437 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1438 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1439{{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1440 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1441 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1442 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1443 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1444 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1445 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1446{{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1447 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1448 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1449 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1450 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1451 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1452 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1453{{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1454 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1455 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1456 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1457 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1458 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1459 {1, 0, 0, 0, 0, 0, 0, 0, 0}}
1460};
1461
1462/*
1463 * select_point selects the |idx|th point from a precomputation table and
1464 * copies it to out.
1465 */
b853717f 1466 /* pre_comp below is of the size provided in |size| */
0f113f3e
MC
1467static void select_point(const limb idx, unsigned int size,
1468 const felem pre_comp[][3], felem out[3])
1469{
1470 unsigned i, j;
1471 limb *outlimbs = &out[0][0];
16f8d4eb 1472
88f4c6f3 1473 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1474
1475 for (i = 0; i < size; i++) {
1476 const limb *inlimbs = &pre_comp[i][0][0];
1477 limb mask = i ^ idx;
1478 mask |= mask >> 4;
1479 mask |= mask >> 2;
1480 mask |= mask >> 1;
1481 mask &= 1;
1482 mask--;
1483 for (j = 0; j < NLIMBS * 3; j++)
1484 outlimbs[j] |= inlimbs[j] & mask;
1485 }
1486}
3e00b4c9
BM
1487
1488/* get_bit returns the |i|th bit in |in| */
1489static char get_bit(const felem_bytearray in, int i)
0f113f3e
MC
1490{
1491 if (i < 0)
1492 return 0;
1493 return (in[i >> 3] >> (i & 7)) & 1;
1494}
3e00b4c9 1495
0f113f3e
MC
1496/*
1497 * Interleaved point multiplication using precomputed point multiples: The
1498 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1499 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1500 * generator, using certain (large) precomputed multiples in g_pre_comp.
1501 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1502 */
1503static void batch_mul(felem x_out, felem y_out, felem z_out,
1504 const felem_bytearray scalars[],
1505 const unsigned num_points, const u8 *g_scalar,
1506 const int mixed, const felem pre_comp[][17][3],
1507 const felem g_pre_comp[16][3])
1508{
1509 int i, skip;
1510 unsigned num, gen_mul = (g_scalar != NULL);
1511 felem nq[3], tmp[4];
1512 limb bits;
1513 u8 sign, digit;
1514
1515 /* set nq to the point at infinity */
16f8d4eb 1516 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1517
1518 /*
1519 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1520 * of the generator (last quarter of rounds) and additions of other
1521 * points multiples (every 5th round).
1522 */
1523 skip = 1; /* save two point operations in the first
1524 * round */
1525 for (i = (num_points ? 520 : 130); i >= 0; --i) {
1526 /* double */
1527 if (!skip)
1528 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1529
1530 /* add multiples of the generator */
1531 if (gen_mul && (i <= 130)) {
1532 bits = get_bit(g_scalar, i + 390) << 3;
1533 if (i < 130) {
1534 bits |= get_bit(g_scalar, i + 260) << 2;
1535 bits |= get_bit(g_scalar, i + 130) << 1;
1536 bits |= get_bit(g_scalar, i);
1537 }
1538 /* select the point to add, in constant time */
1539 select_point(bits, 16, g_pre_comp, tmp);
1540 if (!skip) {
1541 /* The 1 argument below is for "mixed" */
1542 point_add(nq[0], nq[1], nq[2],
1543 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1544 } else {
1545 memcpy(nq, tmp, 3 * sizeof(felem));
1546 skip = 0;
1547 }
1548 }
1549
1550 /* do other additions every 5 doublings */
1551 if (num_points && (i % 5 == 0)) {
1552 /* loop over all scalars */
1553 for (num = 0; num < num_points; ++num) {
1554 bits = get_bit(scalars[num], i + 4) << 5;
1555 bits |= get_bit(scalars[num], i + 3) << 4;
1556 bits |= get_bit(scalars[num], i + 2) << 3;
1557 bits |= get_bit(scalars[num], i + 1) << 2;
1558 bits |= get_bit(scalars[num], i) << 1;
1559 bits |= get_bit(scalars[num], i - 1);
1560 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1561
1562 /*
1563 * select the point to add or subtract, in constant time
1564 */
1565 select_point(digit, 17, pre_comp[num], tmp);
1566 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1567 * point */
1568 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1569
1570 if (!skip) {
1571 point_add(nq[0], nq[1], nq[2],
1572 nq[0], nq[1], nq[2],
1573 mixed, tmp[0], tmp[1], tmp[2]);
1574 } else {
1575 memcpy(nq, tmp, 3 * sizeof(felem));
1576 skip = 0;
1577 }
1578 }
1579 }
1580 }
1581 felem_assign(x_out, nq[0]);
1582 felem_assign(y_out, nq[1]);
1583 felem_assign(z_out, nq[2]);
1584}
3e00b4c9
BM
1585
1586/* Precomputation for the group generator. */
126d6864 1587struct nistp521_pre_comp_st {
0f113f3e
MC
1588 felem g_pre_comp[16][3];
1589 int references;
3aef36ff 1590};
3e00b4c9
BM
1591
1592const EC_METHOD *EC_GFp_nistp521_method(void)
0f113f3e
MC
1593{
1594 static const EC_METHOD ret = {
1595 EC_FLAGS_DEFAULT_OCT,
1596 NID_X9_62_prime_field,
1597 ec_GFp_nistp521_group_init,
1598 ec_GFp_simple_group_finish,
1599 ec_GFp_simple_group_clear_finish,
1600 ec_GFp_nist_group_copy,
1601 ec_GFp_nistp521_group_set_curve,
1602 ec_GFp_simple_group_get_curve,
1603 ec_GFp_simple_group_get_degree,
1604 ec_GFp_simple_group_check_discriminant,
1605 ec_GFp_simple_point_init,
1606 ec_GFp_simple_point_finish,
1607 ec_GFp_simple_point_clear_finish,
1608 ec_GFp_simple_point_copy,
1609 ec_GFp_simple_point_set_to_infinity,
1610 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1611 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1612 ec_GFp_simple_point_set_affine_coordinates,
1613 ec_GFp_nistp521_point_get_affine_coordinates,
1614 0 /* point_set_compressed_coordinates */ ,
1615 0 /* point2oct */ ,
1616 0 /* oct2point */ ,
1617 ec_GFp_simple_add,
1618 ec_GFp_simple_dbl,
1619 ec_GFp_simple_invert,
1620 ec_GFp_simple_is_at_infinity,
1621 ec_GFp_simple_is_on_curve,
1622 ec_GFp_simple_cmp,
1623 ec_GFp_simple_make_affine,
1624 ec_GFp_simple_points_make_affine,
1625 ec_GFp_nistp521_points_mul,
1626 ec_GFp_nistp521_precompute_mult,
1627 ec_GFp_nistp521_have_precompute_mult,
1628 ec_GFp_nist_field_mul,
1629 ec_GFp_nist_field_sqr,
1630 0 /* field_div */ ,
1631 0 /* field_encode */ ,
1632 0 /* field_decode */ ,
1633 0 /* field_set_to_one */
1634 };
1635
1636 return &ret;
1637}
3e00b4c9
BM
1638
1639/******************************************************************************/
0f113f3e
MC
1640/*
1641 * FUNCTIONS TO MANAGE PRECOMPUTATION
3e00b4c9
BM
1642 */
1643
1644static NISTP521_PRE_COMP *nistp521_pre_comp_new()
0f113f3e 1645{
b51bce94 1646 NISTP521_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
b4faea50 1647
90945fa3 1648 if (ret == NULL) {
0f113f3e
MC
1649 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1650 return ret;
1651 }
0f113f3e
MC
1652 ret->references = 1;
1653 return ret;
1654}
3e00b4c9 1655
3aef36ff 1656NISTP521_PRE_COMP *EC_nistp521_pre_comp_dup(NISTP521_PRE_COMP *p)
0f113f3e 1657{
3aef36ff
RS
1658 if (p != NULL)
1659 CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1660 return p;
0f113f3e 1661}
3e00b4c9 1662
3aef36ff 1663void EC_nistp521_pre_comp_free(NISTP521_PRE_COMP *p)
0f113f3e 1664{
3aef36ff
RS
1665 if (p == NULL
1666 || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
0f113f3e 1667 return;
3aef36ff 1668 OPENSSL_free(p);
0f113f3e 1669}
3e00b4c9
BM
1670
1671/******************************************************************************/
0f113f3e
MC
1672/*
1673 * OPENSSL EC_METHOD FUNCTIONS
3e00b4c9
BM
1674 */
1675
1676int ec_GFp_nistp521_group_init(EC_GROUP *group)
0f113f3e
MC
1677{
1678 int ret;
1679 ret = ec_GFp_simple_group_init(group);
1680 group->a_is_minus3 = 1;
1681 return ret;
1682}
3e00b4c9
BM
1683
1684int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
0f113f3e
MC
1685 const BIGNUM *a, const BIGNUM *b,
1686 BN_CTX *ctx)
1687{
1688 int ret = 0;
1689 BN_CTX *new_ctx = NULL;
1690 BIGNUM *curve_p, *curve_a, *curve_b;
1691
1692 if (ctx == NULL)
1693 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1694 return 0;
1695 BN_CTX_start(ctx);
1696 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1697 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1698 ((curve_b = BN_CTX_get(ctx)) == NULL))
1699 goto err;
1700 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1701 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1702 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1703 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1704 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1705 EC_R_WRONG_CURVE_PARAMETERS);
1706 goto err;
1707 }
1708 group->field_mod_func = BN_nist_mod_521;
1709 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1710 err:
1711 BN_CTX_end(ctx);
23a1d5e9 1712 BN_CTX_free(new_ctx);
0f113f3e
MC
1713 return ret;
1714}
1715
1716/*
1717 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1718 * (X/Z^2, Y/Z^3)
1719 */
3e00b4c9 1720int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
0f113f3e
MC
1721 const EC_POINT *point,
1722 BIGNUM *x, BIGNUM *y,
1723 BN_CTX *ctx)
1724{
1725 felem z1, z2, x_in, y_in, x_out, y_out;
1726 largefelem tmp;
1727
1728 if (EC_POINT_is_at_infinity(group, point)) {
1729 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1730 EC_R_POINT_AT_INFINITY);
1731 return 0;
1732 }
ace8f546
AP
1733 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1734 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1735 return 0;
1736 felem_inv(z2, z1);
1737 felem_square(tmp, z2);
1738 felem_reduce(z1, tmp);
1739 felem_mul(tmp, x_in, z1);
1740 felem_reduce(x_in, tmp);
1741 felem_contract(x_out, x_in);
1742 if (x != NULL) {
1743 if (!felem_to_BN(x, x_out)) {
1744 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1745 ERR_R_BN_LIB);
1746 return 0;
1747 }
1748 }
1749 felem_mul(tmp, z1, z2);
1750 felem_reduce(z1, tmp);
1751 felem_mul(tmp, y_in, z1);
1752 felem_reduce(y_in, tmp);
1753 felem_contract(y_out, y_in);
1754 if (y != NULL) {
1755 if (!felem_to_BN(y, y_out)) {
1756 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1757 ERR_R_BN_LIB);
1758 return 0;
1759 }
1760 }
1761 return 1;
1762}
3e00b4c9 1763
b853717f 1764/* points below is of size |num|, and tmp_felems is of size |num+1/ */
0f113f3e
MC
1765static void make_points_affine(size_t num, felem points[][3],
1766 felem tmp_felems[])
1767{
1768 /*
1769 * Runs in constant time, unless an input is the point at infinity (which
1770 * normally shouldn't happen).
1771 */
1772 ec_GFp_nistp_points_make_affine_internal(num,
1773 points,
1774 sizeof(felem),
1775 tmp_felems,
1776 (void (*)(void *))felem_one,
1777 (int (*)(const void *))
1778 felem_is_zero_int,
1779 (void (*)(void *, const void *))
1780 felem_assign,
1781 (void (*)(void *, const void *))
1782 felem_square_reduce, (void (*)
1783 (void *,
1784 const void
1785 *,
1786 const void
1787 *))
1788 felem_mul_reduce,
1789 (void (*)(void *, const void *))
1790 felem_inv,
1791 (void (*)(void *, const void *))
1792 felem_contract);
1793}
1794
1795/*
1796 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1797 * values Result is stored in r (r can equal one of the inputs).
1798 */
3e00b4c9 1799int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
0f113f3e
MC
1800 const BIGNUM *scalar, size_t num,
1801 const EC_POINT *points[],
1802 const BIGNUM *scalars[], BN_CTX *ctx)
1803{
1804 int ret = 0;
1805 int j;
1806 int mixed = 0;
1807 BN_CTX *new_ctx = NULL;
1808 BIGNUM *x, *y, *z, *tmp_scalar;
1809 felem_bytearray g_secret;
1810 felem_bytearray *secrets = NULL;
16f8d4eb 1811 felem (*pre_comp)[17][3] = NULL;
0f113f3e
MC
1812 felem *tmp_felems = NULL;
1813 felem_bytearray tmp;
1814 unsigned i, num_bytes;
1815 int have_pre_comp = 0;
1816 size_t num_points = num;
1817 felem x_in, y_in, z_in, x_out, y_out, z_out;
1818 NISTP521_PRE_COMP *pre = NULL;
1819 felem(*g_pre_comp)[3] = NULL;
1820 EC_POINT *generator = NULL;
1821 const EC_POINT *p = NULL;
1822 const BIGNUM *p_scalar = NULL;
1823
1824 if (ctx == NULL)
1825 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1826 return 0;
1827 BN_CTX_start(ctx);
1828 if (((x = BN_CTX_get(ctx)) == NULL) ||
1829 ((y = BN_CTX_get(ctx)) == NULL) ||
1830 ((z = BN_CTX_get(ctx)) == NULL) ||
1831 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1832 goto err;
1833
1834 if (scalar != NULL) {
3aef36ff 1835 pre = group->pre_comp.nistp521;
0f113f3e
MC
1836 if (pre)
1837 /* we have precomputation, try to use it */
1838 g_pre_comp = &pre->g_pre_comp[0];
1839 else
1840 /* try to use the standard precomputation */
1841 g_pre_comp = (felem(*)[3]) gmul;
1842 generator = EC_POINT_new(group);
1843 if (generator == NULL)
1844 goto err;
1845 /* get the generator from precomputation */
1846 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1847 !felem_to_BN(y, g_pre_comp[1][1]) ||
1848 !felem_to_BN(z, g_pre_comp[1][2])) {
1849 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1850 goto err;
1851 }
1852 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1853 generator, x, y, z,
1854 ctx))
1855 goto err;
1856 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1857 /* precomputation matches generator */
1858 have_pre_comp = 1;
1859 else
1860 /*
1861 * we don't have valid precomputation: treat the generator as a
1862 * random point
1863 */
1864 num_points++;
1865 }
1866
1867 if (num_points > 0) {
1868 if (num_points >= 2) {
1869 /*
1870 * unless we precompute multiples for just one point, converting
1871 * those into affine form is time well spent
1872 */
1873 mixed = 1;
1874 }
b51bce94
RS
1875 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1876 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
1877 if (mixed)
1878 tmp_felems =
88f4c6f3 1879 OPENSSL_malloc(sizeof(*tmp_felems) * (num_points * 17 + 1));
0f113f3e
MC
1880 if ((secrets == NULL) || (pre_comp == NULL)
1881 || (mixed && (tmp_felems == NULL))) {
1882 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1883 goto err;
1884 }
1885
1886 /*
1887 * we treat NULL scalars as 0, and NULL points as points at infinity,
1888 * i.e., they contribute nothing to the linear combination
1889 */
0f113f3e
MC
1890 for (i = 0; i < num_points; ++i) {
1891 if (i == num)
1892 /*
1893 * we didn't have a valid precomputation, so we pick the
1894 * generator
1895 */
1896 {
1897 p = EC_GROUP_get0_generator(group);
1898 p_scalar = scalar;
1899 } else
1900 /* the i^th point */
1901 {
1902 p = points[i];
1903 p_scalar = scalars[i];
1904 }
1905 if ((p_scalar != NULL) && (p != NULL)) {
1906 /* reduce scalar to 0 <= scalar < 2^521 */
1907 if ((BN_num_bits(p_scalar) > 521)
1908 || (BN_is_negative(p_scalar))) {
1909 /*
1910 * this is an unusual input, and we don't guarantee
1911 * constant-timeness
1912 */
ace8f546 1913 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
0f113f3e
MC
1914 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1915 goto err;
1916 }
1917 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1918 } else
1919 num_bytes = BN_bn2bin(p_scalar, tmp);
1920 flip_endian(secrets[i], tmp, num_bytes);
1921 /* precompute multiples */
ace8f546
AP
1922 if ((!BN_to_felem(x_out, p->X)) ||
1923 (!BN_to_felem(y_out, p->Y)) ||
1924 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
1925 goto err;
1926 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1927 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1928 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1929 for (j = 2; j <= 16; ++j) {
1930 if (j & 1) {
1931 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1932 pre_comp[i][j][2], pre_comp[i][1][0],
1933 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1934 pre_comp[i][j - 1][0],
1935 pre_comp[i][j - 1][1],
1936 pre_comp[i][j - 1][2]);
1937 } else {
1938 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1939 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1940 pre_comp[i][j / 2][1],
1941 pre_comp[i][j / 2][2]);
1942 }
1943 }
1944 }
1945 }
1946 if (mixed)
1947 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1948 }
1949
1950 /* the scalar for the generator */
1951 if ((scalar != NULL) && (have_pre_comp)) {
1952 memset(g_secret, 0, sizeof(g_secret));
1953 /* reduce scalar to 0 <= scalar < 2^521 */
1954 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) {
1955 /*
1956 * this is an unusual input, and we don't guarantee
1957 * constant-timeness
1958 */
ace8f546 1959 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
0f113f3e
MC
1960 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1961 goto err;
1962 }
1963 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1964 } else
1965 num_bytes = BN_bn2bin(scalar, tmp);
1966 flip_endian(g_secret, tmp, num_bytes);
1967 /* do the multiplication with generator precomputation */
1968 batch_mul(x_out, y_out, z_out,
1969 (const felem_bytearray(*))secrets, num_points,
1970 g_secret,
1971 mixed, (const felem(*)[17][3])pre_comp,
1972 (const felem(*)[3])g_pre_comp);
1973 } else
1974 /* do the multiplication without generator precomputation */
1975 batch_mul(x_out, y_out, z_out,
1976 (const felem_bytearray(*))secrets, num_points,
1977 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1978 /* reduce the output to its unique minimal representation */
1979 felem_contract(x_in, x_out);
1980 felem_contract(y_in, y_out);
1981 felem_contract(z_in, z_out);
1982 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1983 (!felem_to_BN(z, z_in))) {
1984 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1985 goto err;
1986 }
1987 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1988
1989 err:
1990 BN_CTX_end(ctx);
8fdc3734 1991 EC_POINT_free(generator);
23a1d5e9 1992 BN_CTX_free(new_ctx);
b548a1f1
RS
1993 OPENSSL_free(secrets);
1994 OPENSSL_free(pre_comp);
1995 OPENSSL_free(tmp_felems);
0f113f3e
MC
1996 return ret;
1997}
3e00b4c9
BM
1998
1999int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
2000{
2001 int ret = 0;
2002 NISTP521_PRE_COMP *pre = NULL;
2003 int i, j;
2004 BN_CTX *new_ctx = NULL;
2005 BIGNUM *x, *y;
2006 EC_POINT *generator = NULL;
2007 felem tmp_felems[16];
2008
2009 /* throw away old precomputation */
2c52ac9b 2010 EC_pre_comp_free(group);
0f113f3e
MC
2011 if (ctx == NULL)
2012 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2013 return 0;
2014 BN_CTX_start(ctx);
2015 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2016 goto err;
2017 /* get the generator */
2018 if (group->generator == NULL)
2019 goto err;
2020 generator = EC_POINT_new(group);
2021 if (generator == NULL)
2022 goto err;
2023 BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);
2024 BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);
2025 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2026 goto err;
2027 if ((pre = nistp521_pre_comp_new()) == NULL)
2028 goto err;
2029 /*
2030 * if the generator is the standard one, use built-in precomputation
2031 */
2032 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2033 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 2034 goto done;
0f113f3e 2035 }
ace8f546
AP
2036 if ((!BN_to_felem(pre->g_pre_comp[1][0], group->generator->X)) ||
2037 (!BN_to_felem(pre->g_pre_comp[1][1], group->generator->Y)) ||
2038 (!BN_to_felem(pre->g_pre_comp[1][2], group->generator->Z)))
0f113f3e
MC
2039 goto err;
2040 /* compute 2^130*G, 2^260*G, 2^390*G */
2041 for (i = 1; i <= 4; i <<= 1) {
2042 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],
2043 pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],
2044 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
2045 for (j = 0; j < 129; ++j) {
2046 point_double(pre->g_pre_comp[2 * i][0],
2047 pre->g_pre_comp[2 * i][1],
2048 pre->g_pre_comp[2 * i][2],
2049 pre->g_pre_comp[2 * i][0],
2050 pre->g_pre_comp[2 * i][1],
2051 pre->g_pre_comp[2 * i][2]);
2052 }
2053 }
2054 /* g_pre_comp[0] is the point at infinity */
2055 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
2056 /* the remaining multiples */
2057 /* 2^130*G + 2^260*G */
2058 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2059 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2060 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2061 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2062 pre->g_pre_comp[2][2]);
2063 /* 2^130*G + 2^390*G */
2064 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2065 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2066 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2067 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2068 pre->g_pre_comp[2][2]);
2069 /* 2^260*G + 2^390*G */
2070 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2071 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2072 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2073 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2074 pre->g_pre_comp[4][2]);
2075 /* 2^130*G + 2^260*G + 2^390*G */
2076 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2077 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2078 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2079 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2080 pre->g_pre_comp[2][2]);
2081 for (i = 1; i < 8; ++i) {
2082 /* odd multiples: add G */
2083 point_add(pre->g_pre_comp[2 * i + 1][0],
2084 pre->g_pre_comp[2 * i + 1][1],
2085 pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],
2086 pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,
2087 pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2088 pre->g_pre_comp[1][2]);
2089 }
2090 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2091
615614c8 2092 done:
3aef36ff 2093 SETPRECOMP(group, nistp521, pre);
0f113f3e
MC
2094 ret = 1;
2095 pre = NULL;
3e00b4c9 2096 err:
0f113f3e 2097 BN_CTX_end(ctx);
8fdc3734 2098 EC_POINT_free(generator);
23a1d5e9 2099 BN_CTX_free(new_ctx);
3aef36ff 2100 EC_nistp521_pre_comp_free(pre);
0f113f3e
MC
2101 return ret;
2102}
3e00b4c9
BM
2103
2104int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
0f113f3e 2105{
126d6864 2106 return HAVEPRECOMP(group, nistp521);
0f113f3e 2107}
3e00b4c9
BM
2108
2109#else
0f113f3e 2110static void *dummy = &dummy;
3e00b4c9 2111#endif