1 /* crypto/ec/ecp_nistp521.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
39 #include <openssl/err.h>
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t
; /* nonstandard; implemented by gcc on 64-bit platforms */
46 #error "Need GCC 3.1 or later to define type uint128_t"
53 /* The underlying field.
55 * P521 operates over GF(2^521-1). We can serialise an element of this field
56 * into 66 bytes where the most significant byte contains only a single bit. We
57 * call this an felem_bytearray. */
59 typedef u8 felem_bytearray
[66];
61 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
62 * These values are big-endian. */
63 static const felem_bytearray nistp521_curve_params
[5] =
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
113 * The representation of field elements.
114 * ------------------------------------
116 * We represent field elements with nine values. These values are either 64 or
117 * 128 bits and the field element represented is:
118 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
119 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
120 * 58 bits apart, but are greater than 58 bits in length, the most significant
121 * bits of each limb overlap with the least significant bits of the next.
123 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
128 typedef uint64_t limb
;
129 typedef limb felem
[NLIMBS
];
130 typedef uint128_t largefelem
[NLIMBS
];
132 static const limb bottom57bits
= 0x1ffffffffffffff;
133 static const limb bottom58bits
= 0x3ffffffffffffff;
135 /* bin66_to_felem takes a little-endian byte array and converts it into felem
136 * form. This assumes that the CPU is little-endian. */
137 static void bin66_to_felem(felem out
, const u8 in
[66])
139 out
[0] = (*((limb
*) &in
[0])) & bottom58bits
;
140 out
[1] = (*((limb
*) &in
[7]) >> 2) & bottom58bits
;
141 out
[2] = (*((limb
*) &in
[14]) >> 4) & bottom58bits
;
142 out
[3] = (*((limb
*) &in
[21]) >> 6) & bottom58bits
;
143 out
[4] = (*((limb
*) &in
[29])) & bottom58bits
;
144 out
[5] = (*((limb
*) &in
[36]) >> 2) & bottom58bits
;
145 out
[6] = (*((limb
*) &in
[43]) >> 4) & bottom58bits
;
146 out
[7] = (*((limb
*) &in
[50]) >> 6) & bottom58bits
;
147 out
[8] = (*((limb
*) &in
[58])) & bottom57bits
;
150 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
151 * array. This assumes that the CPU is little-endian. */
152 static void felem_to_bin66(u8 out
[66], const felem in
)
155 (*((limb
*) &out
[0])) = in
[0];
156 (*((limb
*) &out
[7])) |= in
[1] << 2;
157 (*((limb
*) &out
[14])) |= in
[2] << 4;
158 (*((limb
*) &out
[21])) |= in
[3] << 6;
159 (*((limb
*) &out
[29])) = in
[4];
160 (*((limb
*) &out
[36])) |= in
[5] << 2;
161 (*((limb
*) &out
[43])) |= in
[6] << 4;
162 (*((limb
*) &out
[50])) |= in
[7] << 6;
163 (*((limb
*) &out
[58])) = in
[8];
166 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
167 static void flip_endian(u8
*out
, const u8
*in
, unsigned len
)
170 for (i
= 0; i
< len
; ++i
)
171 out
[i
] = in
[len
-1-i
];
174 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
175 static int BN_to_felem(felem out
, const BIGNUM
*bn
)
177 felem_bytearray b_in
;
178 felem_bytearray b_out
;
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out
, 0, sizeof b_out
);
183 num_bytes
= BN_num_bytes(bn
);
184 if (num_bytes
> sizeof b_out
)
186 ECerr(EC_F_BN_TO_FELEM
, EC_R_BIGNUM_OUT_OF_RANGE
);
189 if (BN_is_negative(bn
))
191 ECerr(EC_F_BN_TO_FELEM
, EC_R_BIGNUM_OUT_OF_RANGE
);
194 num_bytes
= BN_bn2bin(bn
, b_in
);
195 flip_endian(b_out
, b_in
, num_bytes
);
196 bin66_to_felem(out
, b_out
);
200 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
201 static BIGNUM
*felem_to_BN(BIGNUM
*out
, const felem in
)
203 felem_bytearray b_in
, b_out
;
204 felem_to_bin66(b_in
, in
);
205 flip_endian(b_out
, b_in
, sizeof b_out
);
206 return BN_bin2bn(b_out
, sizeof b_out
, out
);
211 * ---------------- */
213 static void felem_one(felem out
)
226 static void felem_assign(felem out
, const felem in
)
239 /* felem_sum64 sets out = out + in. */
240 static void felem_sum64(felem out
, const felem in
)
253 /* felem_scalar sets out = in * scalar */
254 static void felem_scalar(felem out
, const felem in
, limb scalar
)
256 out
[0] = in
[0] * scalar
;
257 out
[1] = in
[1] * scalar
;
258 out
[2] = in
[2] * scalar
;
259 out
[3] = in
[3] * scalar
;
260 out
[4] = in
[4] * scalar
;
261 out
[5] = in
[5] * scalar
;
262 out
[6] = in
[6] * scalar
;
263 out
[7] = in
[7] * scalar
;
264 out
[8] = in
[8] * scalar
;
267 /* felem_scalar64 sets out = out * scalar */
268 static void felem_scalar64(felem out
, limb scalar
)
281 /* felem_scalar128 sets out = out * scalar */
282 static void felem_scalar128(largefelem out
, limb scalar
)
296 * felem_neg sets |out| to |-in|
298 * in[i] < 2^59 + 2^14
302 static void felem_neg(felem out
, const felem in
)
304 /* In order to prevent underflow, we subtract from 0 mod p. */
305 static const limb two62m3
= (((limb
)1) << 62) - (((limb
)1) << 5);
306 static const limb two62m2
= (((limb
)1) << 62) - (((limb
)1) << 4);
308 out
[0] = two62m3
- in
[0];
309 out
[1] = two62m2
- in
[1];
310 out
[2] = two62m2
- in
[2];
311 out
[3] = two62m2
- in
[3];
312 out
[4] = two62m2
- in
[4];
313 out
[5] = two62m2
- in
[5];
314 out
[6] = two62m2
- in
[6];
315 out
[7] = two62m2
- in
[7];
316 out
[8] = two62m2
- in
[8];
320 * felem_diff64 subtracts |in| from |out|
322 * in[i] < 2^59 + 2^14
324 * out[i] < out[i] + 2^62
326 static void felem_diff64(felem out
, const felem in
)
328 /* In order to prevent underflow, we add 0 mod p before subtracting. */
329 static const limb two62m3
= (((limb
)1) << 62) - (((limb
)1) << 5);
330 static const limb two62m2
= (((limb
)1) << 62) - (((limb
)1) << 4);
332 out
[0] += two62m3
- in
[0];
333 out
[1] += two62m2
- in
[1];
334 out
[2] += two62m2
- in
[2];
335 out
[3] += two62m2
- in
[3];
336 out
[4] += two62m2
- in
[4];
337 out
[5] += two62m2
- in
[5];
338 out
[6] += two62m2
- in
[6];
339 out
[7] += two62m2
- in
[7];
340 out
[8] += two62m2
- in
[8];
344 * felem_diff_128_64 subtracts |in| from |out|
346 * in[i] < 2^62 + 2^17
348 * out[i] < out[i] + 2^63
350 static void felem_diff_128_64(largefelem out
, const felem in
)
352 /* In order to prevent underflow, we add 0 mod p before subtracting. */
353 static const limb two63m6
= (((limb
)1) << 62) - (((limb
)1) << 5);
354 static const limb two63m5
= (((limb
)1) << 62) - (((limb
)1) << 4);
356 out
[0] += two63m6
- in
[0];
357 out
[1] += two63m5
- in
[1];
358 out
[2] += two63m5
- in
[2];
359 out
[3] += two63m5
- in
[3];
360 out
[4] += two63m5
- in
[4];
361 out
[5] += two63m5
- in
[5];
362 out
[6] += two63m5
- in
[6];
363 out
[7] += two63m5
- in
[7];
364 out
[8] += two63m5
- in
[8];
368 * felem_diff_128_64 subtracts |in| from |out|
372 * out[i] < out[i] + 2^127 - 2^69
374 static void felem_diff128(largefelem out
, const largefelem in
)
376 /* In order to prevent underflow, we add 0 mod p before subtracting. */
377 static const uint128_t two127m70
= (((uint128_t
)1) << 127) - (((uint128_t
)1) << 70);
378 static const uint128_t two127m69
= (((uint128_t
)1) << 127) - (((uint128_t
)1) << 69);
380 out
[0] += (two127m70
- in
[0]);
381 out
[1] += (two127m69
- in
[1]);
382 out
[2] += (two127m69
- in
[2]);
383 out
[3] += (two127m69
- in
[3]);
384 out
[4] += (two127m69
- in
[4]);
385 out
[5] += (two127m69
- in
[5]);
386 out
[6] += (two127m69
- in
[6]);
387 out
[7] += (two127m69
- in
[7]);
388 out
[8] += (two127m69
- in
[8]);
392 * felem_square sets |out| = |in|^2
396 * out[i] < 17 * max(in[i]) * max(in[i])
398 static void felem_square(largefelem out
, const felem in
)
401 felem_scalar(inx2
, in
, 2);
402 felem_scalar(inx4
, in
, 4);
405 * We have many cases were we want to do
408 * This is obviously just
410 * However, rather than do the doubling on the 128 bit result, we
411 * double one of the inputs to the multiplication by reading from
414 out
[0] = ((uint128_t
) in
[0]) * in
[0];
415 out
[1] = ((uint128_t
) in
[0]) * inx2
[1];
416 out
[2] = ((uint128_t
) in
[0]) * inx2
[2] +
417 ((uint128_t
) in
[1]) * in
[1];
418 out
[3] = ((uint128_t
) in
[0]) * inx2
[3] +
419 ((uint128_t
) in
[1]) * inx2
[2];
420 out
[4] = ((uint128_t
) in
[0]) * inx2
[4] +
421 ((uint128_t
) in
[1]) * inx2
[3] +
422 ((uint128_t
) in
[2]) * in
[2];
423 out
[5] = ((uint128_t
) in
[0]) * inx2
[5] +
424 ((uint128_t
) in
[1]) * inx2
[4] +
425 ((uint128_t
) in
[2]) * inx2
[3];
426 out
[6] = ((uint128_t
) in
[0]) * inx2
[6] +
427 ((uint128_t
) in
[1]) * inx2
[5] +
428 ((uint128_t
) in
[2]) * inx2
[4] +
429 ((uint128_t
) in
[3]) * in
[3];
430 out
[7] = ((uint128_t
) in
[0]) * inx2
[7] +
431 ((uint128_t
) in
[1]) * inx2
[6] +
432 ((uint128_t
) in
[2]) * inx2
[5] +
433 ((uint128_t
) in
[3]) * inx2
[4];
434 out
[8] = ((uint128_t
) in
[0]) * inx2
[8] +
435 ((uint128_t
) in
[1]) * inx2
[7] +
436 ((uint128_t
) in
[2]) * inx2
[6] +
437 ((uint128_t
) in
[3]) * inx2
[5] +
438 ((uint128_t
) in
[4]) * in
[4];
440 /* The remaining limbs fall above 2^521, with the first falling at
441 * 2^522. They correspond to locations one bit up from the limbs
442 * produced above so we would have to multiply by two to align them.
443 * Again, rather than operate on the 128-bit result, we double one of
444 * the inputs to the multiplication. If we want to double for both this
445 * reason, and the reason above, then we end up multiplying by four. */
448 out
[0] += ((uint128_t
) in
[1]) * inx4
[8] +
449 ((uint128_t
) in
[2]) * inx4
[7] +
450 ((uint128_t
) in
[3]) * inx4
[6] +
451 ((uint128_t
) in
[4]) * inx4
[5];
454 out
[1] += ((uint128_t
) in
[2]) * inx4
[8] +
455 ((uint128_t
) in
[3]) * inx4
[7] +
456 ((uint128_t
) in
[4]) * inx4
[6] +
457 ((uint128_t
) in
[5]) * inx2
[5];
460 out
[2] += ((uint128_t
) in
[3]) * inx4
[8] +
461 ((uint128_t
) in
[4]) * inx4
[7] +
462 ((uint128_t
) in
[5]) * inx4
[6];
465 out
[3] += ((uint128_t
) in
[4]) * inx4
[8] +
466 ((uint128_t
) in
[5]) * inx4
[7] +
467 ((uint128_t
) in
[6]) * inx2
[6];
470 out
[4] += ((uint128_t
) in
[5]) * inx4
[8] +
471 ((uint128_t
) in
[6]) * inx4
[7];
474 out
[5] += ((uint128_t
) in
[6]) * inx4
[8] +
475 ((uint128_t
) in
[7]) * inx2
[7];
478 out
[6] += ((uint128_t
) in
[7]) * inx4
[8];
481 out
[7] += ((uint128_t
) in
[8]) * inx2
[8];
485 * felem_mul sets |out| = |in1| * |in2|
490 * out[i] < 17 * max(in1[i]) * max(in2[i])
492 static void felem_mul(largefelem out
, const felem in1
, const felem in2
)
495 felem_scalar(in2x2
, in2
, 2);
497 out
[0] = ((uint128_t
) in1
[0]) * in2
[0];
499 out
[1] = ((uint128_t
) in1
[0]) * in2
[1] +
500 ((uint128_t
) in1
[1]) * in2
[0];
502 out
[2] = ((uint128_t
) in1
[0]) * in2
[2] +
503 ((uint128_t
) in1
[1]) * in2
[1] +
504 ((uint128_t
) in1
[2]) * in2
[0];
506 out
[3] = ((uint128_t
) in1
[0]) * in2
[3] +
507 ((uint128_t
) in1
[1]) * in2
[2] +
508 ((uint128_t
) in1
[2]) * in2
[1] +
509 ((uint128_t
) in1
[3]) * in2
[0];
511 out
[4] = ((uint128_t
) in1
[0]) * in2
[4] +
512 ((uint128_t
) in1
[1]) * in2
[3] +
513 ((uint128_t
) in1
[2]) * in2
[2] +
514 ((uint128_t
) in1
[3]) * in2
[1] +
515 ((uint128_t
) in1
[4]) * in2
[0];
517 out
[5] = ((uint128_t
) in1
[0]) * in2
[5] +
518 ((uint128_t
) in1
[1]) * in2
[4] +
519 ((uint128_t
) in1
[2]) * in2
[3] +
520 ((uint128_t
) in1
[3]) * in2
[2] +
521 ((uint128_t
) in1
[4]) * in2
[1] +
522 ((uint128_t
) in1
[5]) * in2
[0];
524 out
[6] = ((uint128_t
) in1
[0]) * in2
[6] +
525 ((uint128_t
) in1
[1]) * in2
[5] +
526 ((uint128_t
) in1
[2]) * in2
[4] +
527 ((uint128_t
) in1
[3]) * in2
[3] +
528 ((uint128_t
) in1
[4]) * in2
[2] +
529 ((uint128_t
) in1
[5]) * in2
[1] +
530 ((uint128_t
) in1
[6]) * in2
[0];
532 out
[7] = ((uint128_t
) in1
[0]) * in2
[7] +
533 ((uint128_t
) in1
[1]) * in2
[6] +
534 ((uint128_t
) in1
[2]) * in2
[5] +
535 ((uint128_t
) in1
[3]) * in2
[4] +
536 ((uint128_t
) in1
[4]) * in2
[3] +
537 ((uint128_t
) in1
[5]) * in2
[2] +
538 ((uint128_t
) in1
[6]) * in2
[1] +
539 ((uint128_t
) in1
[7]) * in2
[0];
541 out
[8] = ((uint128_t
) in1
[0]) * in2
[8] +
542 ((uint128_t
) in1
[1]) * in2
[7] +
543 ((uint128_t
) in1
[2]) * in2
[6] +
544 ((uint128_t
) in1
[3]) * in2
[5] +
545 ((uint128_t
) in1
[4]) * in2
[4] +
546 ((uint128_t
) in1
[5]) * in2
[3] +
547 ((uint128_t
) in1
[6]) * in2
[2] +
548 ((uint128_t
) in1
[7]) * in2
[1] +
549 ((uint128_t
) in1
[8]) * in2
[0];
551 /* See comment in felem_square about the use of in2x2 here */
553 out
[0] += ((uint128_t
) in1
[1]) * in2x2
[8] +
554 ((uint128_t
) in1
[2]) * in2x2
[7] +
555 ((uint128_t
) in1
[3]) * in2x2
[6] +
556 ((uint128_t
) in1
[4]) * in2x2
[5] +
557 ((uint128_t
) in1
[5]) * in2x2
[4] +
558 ((uint128_t
) in1
[6]) * in2x2
[3] +
559 ((uint128_t
) in1
[7]) * in2x2
[2] +
560 ((uint128_t
) in1
[8]) * in2x2
[1];
562 out
[1] += ((uint128_t
) in1
[2]) * in2x2
[8] +
563 ((uint128_t
) in1
[3]) * in2x2
[7] +
564 ((uint128_t
) in1
[4]) * in2x2
[6] +
565 ((uint128_t
) in1
[5]) * in2x2
[5] +
566 ((uint128_t
) in1
[6]) * in2x2
[4] +
567 ((uint128_t
) in1
[7]) * in2x2
[3] +
568 ((uint128_t
) in1
[8]) * in2x2
[2];
570 out
[2] += ((uint128_t
) in1
[3]) * in2x2
[8] +
571 ((uint128_t
) in1
[4]) * in2x2
[7] +
572 ((uint128_t
) in1
[5]) * in2x2
[6] +
573 ((uint128_t
) in1
[6]) * in2x2
[5] +
574 ((uint128_t
) in1
[7]) * in2x2
[4] +
575 ((uint128_t
) in1
[8]) * in2x2
[3];
577 out
[3] += ((uint128_t
) in1
[4]) * in2x2
[8] +
578 ((uint128_t
) in1
[5]) * in2x2
[7] +
579 ((uint128_t
) in1
[6]) * in2x2
[6] +
580 ((uint128_t
) in1
[7]) * in2x2
[5] +
581 ((uint128_t
) in1
[8]) * in2x2
[4];
583 out
[4] += ((uint128_t
) in1
[5]) * in2x2
[8] +
584 ((uint128_t
) in1
[6]) * in2x2
[7] +
585 ((uint128_t
) in1
[7]) * in2x2
[6] +
586 ((uint128_t
) in1
[8]) * in2x2
[5];
588 out
[5] += ((uint128_t
) in1
[6]) * in2x2
[8] +
589 ((uint128_t
) in1
[7]) * in2x2
[7] +
590 ((uint128_t
) in1
[8]) * in2x2
[6];
592 out
[6] += ((uint128_t
) in1
[7]) * in2x2
[8] +
593 ((uint128_t
) in1
[8]) * in2x2
[7];
595 out
[7] += ((uint128_t
) in1
[8]) * in2x2
[8];
598 static const limb bottom52bits
= 0xfffffffffffff;
601 * felem_reduce converts a largefelem to an felem.
605 * out[i] < 2^59 + 2^14
607 static void felem_reduce(felem out
, const largefelem in
)
609 u64 overflow1
, overflow2
;
611 out
[0] = ((limb
) in
[0]) & bottom58bits
;
612 out
[1] = ((limb
) in
[1]) & bottom58bits
;
613 out
[2] = ((limb
) in
[2]) & bottom58bits
;
614 out
[3] = ((limb
) in
[3]) & bottom58bits
;
615 out
[4] = ((limb
) in
[4]) & bottom58bits
;
616 out
[5] = ((limb
) in
[5]) & bottom58bits
;
617 out
[6] = ((limb
) in
[6]) & bottom58bits
;
618 out
[7] = ((limb
) in
[7]) & bottom58bits
;
619 out
[8] = ((limb
) in
[8]) & bottom58bits
;
623 out
[1] += ((limb
) in
[0]) >> 58;
624 out
[1] += (((limb
) (in
[0] >> 64)) & bottom52bits
) << 6;
625 /* out[1] < 2^58 + 2^6 + 2^58
627 out
[2] += ((limb
) (in
[0] >> 64)) >> 52;
629 out
[2] += ((limb
) in
[1]) >> 58;
630 out
[2] += (((limb
) (in
[1] >> 64)) & bottom52bits
) << 6;
631 out
[3] += ((limb
) (in
[1] >> 64)) >> 52;
633 out
[3] += ((limb
) in
[2]) >> 58;
634 out
[3] += (((limb
) (in
[2] >> 64)) & bottom52bits
) << 6;
635 out
[4] += ((limb
) (in
[2] >> 64)) >> 52;
637 out
[4] += ((limb
) in
[3]) >> 58;
638 out
[4] += (((limb
) (in
[3] >> 64)) & bottom52bits
) << 6;
639 out
[5] += ((limb
) (in
[3] >> 64)) >> 52;
641 out
[5] += ((limb
) in
[4]) >> 58;
642 out
[5] += (((limb
) (in
[4] >> 64)) & bottom52bits
) << 6;
643 out
[6] += ((limb
) (in
[4] >> 64)) >> 52;
645 out
[6] += ((limb
) in
[5]) >> 58;
646 out
[6] += (((limb
) (in
[5] >> 64)) & bottom52bits
) << 6;
647 out
[7] += ((limb
) (in
[5] >> 64)) >> 52;
649 out
[7] += ((limb
) in
[6]) >> 58;
650 out
[7] += (((limb
) (in
[6] >> 64)) & bottom52bits
) << 6;
651 out
[8] += ((limb
) (in
[6] >> 64)) >> 52;
653 out
[8] += ((limb
) in
[7]) >> 58;
654 out
[8] += (((limb
) (in
[7] >> 64)) & bottom52bits
) << 6;
655 /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
657 overflow1
= ((limb
) (in
[7] >> 64)) >> 52;
659 overflow1
+= ((limb
) in
[8]) >> 58;
660 overflow1
+= (((limb
) (in
[8] >> 64)) & bottom52bits
) << 6;
661 overflow2
= ((limb
) (in
[8] >> 64)) >> 52;
663 overflow1
<<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
664 overflow2
<<= 1; /* overflow2 < 2^13 */
666 out
[0] += overflow1
; /* out[0] < 2^60 */
667 out
[1] += overflow2
; /* out[1] < 2^59 + 2^6 + 2^13 */
669 out
[1] += out
[0] >> 58; out
[0] &= bottom58bits
;
671 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
675 static void felem_square_reduce(felem out
, const felem in
)
678 felem_square(tmp
, in
);
679 felem_reduce(out
, tmp
);
682 static void felem_mul_reduce(felem out
, const felem in1
, const felem in2
)
685 felem_mul(tmp
, in1
, in2
);
686 felem_reduce(out
, tmp
);
690 * felem_inv calculates |out| = |in|^{-1}
692 * Based on Fermat's Little Theorem:
694 * a^{p-1} = 1 (mod p)
695 * a^{p-2} = a^{-1} (mod p)
697 static void felem_inv(felem out
, const felem in
)
699 felem ftmp
, ftmp2
, ftmp3
, ftmp4
;
703 felem_square(tmp
, in
); felem_reduce(ftmp
, tmp
); /* 2^1 */
704 felem_mul(tmp
, in
, ftmp
); felem_reduce(ftmp
, tmp
); /* 2^2 - 2^0 */
705 felem_assign(ftmp2
, ftmp
);
706 felem_square(tmp
, ftmp
); felem_reduce(ftmp
, tmp
); /* 2^3 - 2^1 */
707 felem_mul(tmp
, in
, ftmp
); felem_reduce(ftmp
, tmp
); /* 2^3 - 2^0 */
708 felem_square(tmp
, ftmp
); felem_reduce(ftmp
, tmp
); /* 2^4 - 2^1 */
710 felem_square(tmp
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^3 - 2^1 */
711 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^4 - 2^2 */
712 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^4 - 2^0 */
714 felem_assign(ftmp2
, ftmp3
);
715 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^5 - 2^1 */
716 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^6 - 2^2 */
717 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^7 - 2^3 */
718 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^8 - 2^4 */
719 felem_assign(ftmp4
, ftmp3
);
720 felem_mul(tmp
, ftmp3
, ftmp
); felem_reduce(ftmp4
, tmp
); /* 2^8 - 2^1 */
721 felem_square(tmp
, ftmp4
); felem_reduce(ftmp4
, tmp
); /* 2^9 - 2^2 */
722 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^8 - 2^0 */
723 felem_assign(ftmp2
, ftmp3
);
725 for (i
= 0; i
< 8; i
++)
727 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^16 - 2^8 */
729 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^16 - 2^0 */
730 felem_assign(ftmp2
, ftmp3
);
732 for (i
= 0; i
< 16; i
++)
734 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^32 - 2^16 */
736 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^32 - 2^0 */
737 felem_assign(ftmp2
, ftmp3
);
739 for (i
= 0; i
< 32; i
++)
741 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^64 - 2^32 */
743 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^64 - 2^0 */
744 felem_assign(ftmp2
, ftmp3
);
746 for (i
= 0; i
< 64; i
++)
748 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^128 - 2^64 */
750 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^128 - 2^0 */
751 felem_assign(ftmp2
, ftmp3
);
753 for (i
= 0; i
< 128; i
++)
755 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^256 - 2^128 */
757 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^256 - 2^0 */
758 felem_assign(ftmp2
, ftmp3
);
760 for (i
= 0; i
< 256; i
++)
762 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^512 - 2^256 */
764 felem_mul(tmp
, ftmp3
, ftmp2
); felem_reduce(ftmp3
, tmp
); /* 2^512 - 2^0 */
766 for (i
= 0; i
< 9; i
++)
768 felem_square(tmp
, ftmp3
); felem_reduce(ftmp3
, tmp
); /* 2^521 - 2^9 */
770 felem_mul(tmp
, ftmp3
, ftmp4
); felem_reduce(ftmp3
, tmp
); /* 2^512 - 2^2 */
771 felem_mul(tmp
, ftmp3
, in
); felem_reduce(out
, tmp
); /* 2^512 - 3 */
774 /* This is 2^521-1, expressed as an felem */
775 static const felem kPrime
=
777 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
778 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
779 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
783 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
786 * in[i] < 2^59 + 2^14
788 static limb
felem_is_zero(const felem in
)
792 felem_assign(ftmp
, in
);
794 ftmp
[0] += ftmp
[8] >> 57; ftmp
[8] &= bottom57bits
;
796 ftmp
[1] += ftmp
[0] >> 58; ftmp
[0] &= bottom58bits
;
797 ftmp
[2] += ftmp
[1] >> 58; ftmp
[1] &= bottom58bits
;
798 ftmp
[3] += ftmp
[2] >> 58; ftmp
[2] &= bottom58bits
;
799 ftmp
[4] += ftmp
[3] >> 58; ftmp
[3] &= bottom58bits
;
800 ftmp
[5] += ftmp
[4] >> 58; ftmp
[4] &= bottom58bits
;
801 ftmp
[6] += ftmp
[5] >> 58; ftmp
[5] &= bottom58bits
;
802 ftmp
[7] += ftmp
[6] >> 58; ftmp
[6] &= bottom58bits
;
803 ftmp
[8] += ftmp
[7] >> 58; ftmp
[7] &= bottom58bits
;
804 /* ftmp[8] < 2^57 + 4 */
806 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
807 * greater than our bound for ftmp[8]. Therefore we only have to check
808 * if the zero is zero or 2^521-1. */
822 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
823 * can be set is if is_zero was 0 before the decrement. */
824 is_zero
= ((s64
) is_zero
) >> 63;
826 is_p
= ftmp
[0] ^ kPrime
[0];
827 is_p
|= ftmp
[1] ^ kPrime
[1];
828 is_p
|= ftmp
[2] ^ kPrime
[2];
829 is_p
|= ftmp
[3] ^ kPrime
[3];
830 is_p
|= ftmp
[4] ^ kPrime
[4];
831 is_p
|= ftmp
[5] ^ kPrime
[5];
832 is_p
|= ftmp
[6] ^ kPrime
[6];
833 is_p
|= ftmp
[7] ^ kPrime
[7];
834 is_p
|= ftmp
[8] ^ kPrime
[8];
837 is_p
= ((s64
) is_p
) >> 63;
843 static int felem_is_zero_int(const felem in
)
845 return (int) (felem_is_zero(in
) & ((limb
)1));
849 * felem_contract converts |in| to its unique, minimal representation.
851 * in[i] < 2^59 + 2^14
853 static void felem_contract(felem out
, const felem in
)
855 limb is_p
, is_greater
, sign
;
856 static const limb two58
= ((limb
)1) << 58;
858 felem_assign(out
, in
);
860 out
[0] += out
[8] >> 57; out
[8] &= bottom57bits
;
862 out
[1] += out
[0] >> 58; out
[0] &= bottom58bits
;
863 out
[2] += out
[1] >> 58; out
[1] &= bottom58bits
;
864 out
[3] += out
[2] >> 58; out
[2] &= bottom58bits
;
865 out
[4] += out
[3] >> 58; out
[3] &= bottom58bits
;
866 out
[5] += out
[4] >> 58; out
[4] &= bottom58bits
;
867 out
[6] += out
[5] >> 58; out
[5] &= bottom58bits
;
868 out
[7] += out
[6] >> 58; out
[6] &= bottom58bits
;
869 out
[8] += out
[7] >> 58; out
[7] &= bottom58bits
;
870 /* out[8] < 2^57 + 4 */
872 /* If the value is greater than 2^521-1 then we have to subtract
873 * 2^521-1 out. See the comments in felem_is_zero regarding why we
874 * don't test for other multiples of the prime. */
876 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
878 is_p
= out
[0] ^ kPrime
[0];
879 is_p
|= out
[1] ^ kPrime
[1];
880 is_p
|= out
[2] ^ kPrime
[2];
881 is_p
|= out
[3] ^ kPrime
[3];
882 is_p
|= out
[4] ^ kPrime
[4];
883 is_p
|= out
[5] ^ kPrime
[5];
884 is_p
|= out
[6] ^ kPrime
[6];
885 is_p
|= out
[7] ^ kPrime
[7];
886 is_p
|= out
[8] ^ kPrime
[8];
895 is_p
= ((s64
) is_p
) >> 63;
898 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
910 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
911 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
912 is_greater
= out
[8] >> 57;
913 is_greater
|= is_greater
<< 32;
914 is_greater
|= is_greater
<< 16;
915 is_greater
|= is_greater
<< 8;
916 is_greater
|= is_greater
<< 4;
917 is_greater
|= is_greater
<< 2;
918 is_greater
|= is_greater
<< 1;
919 is_greater
= ((s64
) is_greater
) >> 63;
921 out
[0] -= kPrime
[0] & is_greater
;
922 out
[1] -= kPrime
[1] & is_greater
;
923 out
[2] -= kPrime
[2] & is_greater
;
924 out
[3] -= kPrime
[3] & is_greater
;
925 out
[4] -= kPrime
[4] & is_greater
;
926 out
[5] -= kPrime
[5] & is_greater
;
927 out
[6] -= kPrime
[6] & is_greater
;
928 out
[7] -= kPrime
[7] & is_greater
;
929 out
[8] -= kPrime
[8] & is_greater
;
931 /* Eliminate negative coefficients */
932 sign
= -(out
[0] >> 63); out
[0] += (two58
& sign
); out
[1] -= (1 & sign
);
933 sign
= -(out
[1] >> 63); out
[1] += (two58
& sign
); out
[2] -= (1 & sign
);
934 sign
= -(out
[2] >> 63); out
[2] += (two58
& sign
); out
[3] -= (1 & sign
);
935 sign
= -(out
[3] >> 63); out
[3] += (two58
& sign
); out
[4] -= (1 & sign
);
936 sign
= -(out
[4] >> 63); out
[4] += (two58
& sign
); out
[5] -= (1 & sign
);
937 sign
= -(out
[0] >> 63); out
[5] += (two58
& sign
); out
[6] -= (1 & sign
);
938 sign
= -(out
[6] >> 63); out
[6] += (two58
& sign
); out
[7] -= (1 & sign
);
939 sign
= -(out
[7] >> 63); out
[7] += (two58
& sign
); out
[8] -= (1 & sign
);
940 sign
= -(out
[5] >> 63); out
[5] += (two58
& sign
); out
[6] -= (1 & sign
);
941 sign
= -(out
[6] >> 63); out
[6] += (two58
& sign
); out
[7] -= (1 & sign
);
942 sign
= -(out
[7] >> 63); out
[7] += (two58
& sign
); out
[8] -= (1 & sign
);
949 * Building on top of the field operations we have the operations on the
950 * elliptic curve group itself. Points on the curve are represented in Jacobian
954 * point_double calcuates 2*(x_in, y_in, z_in)
956 * The method is taken from:
957 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
959 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
960 * while x_out == y_in is not (maybe this works, but it's not tested). */
962 point_double(felem x_out
, felem y_out
, felem z_out
,
963 const felem x_in
, const felem y_in
, const felem z_in
)
965 largefelem tmp
, tmp2
;
966 felem delta
, gamma
, beta
, alpha
, ftmp
, ftmp2
;
968 felem_assign(ftmp
, x_in
);
969 felem_assign(ftmp2
, x_in
);
972 felem_square(tmp
, z_in
);
973 felem_reduce(delta
, tmp
); /* delta[i] < 2^59 + 2^14 */
976 felem_square(tmp
, y_in
);
977 felem_reduce(gamma
, tmp
); /* gamma[i] < 2^59 + 2^14 */
980 felem_mul(tmp
, x_in
, gamma
);
981 felem_reduce(beta
, tmp
); /* beta[i] < 2^59 + 2^14 */
983 /* alpha = 3*(x-delta)*(x+delta) */
984 felem_diff64(ftmp
, delta
);
986 felem_sum64(ftmp2
, delta
);
987 /* ftmp2[i] < 2^60 + 2^15 */
988 felem_scalar64(ftmp2
, 3);
989 /* ftmp2[i] < 3*2^60 + 3*2^15 */
990 felem_mul(tmp
, ftmp
, ftmp2
);
992 * tmp[i] < 17(3*2^121 + 3*2^76)
993 * = 61*2^121 + 61*2^76
994 * < 64*2^121 + 64*2^76
998 felem_reduce(alpha
, tmp
);
1000 /* x' = alpha^2 - 8*beta */
1001 felem_square(tmp
, alpha
);
1002 /* tmp[i] < 17*2^120
1004 felem_assign(ftmp
, beta
);
1005 felem_scalar64(ftmp
, 8);
1006 /* ftmp[i] < 2^62 + 2^17 */
1007 felem_diff_128_64(tmp
, ftmp
);
1008 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1009 felem_reduce(x_out
, tmp
);
1011 /* z' = (y + z)^2 - gamma - delta */
1012 felem_sum64(delta
, gamma
);
1013 /* delta[i] < 2^60 + 2^15 */
1014 felem_assign(ftmp
, y_in
);
1015 felem_sum64(ftmp
, z_in
);
1016 /* ftmp[i] < 2^60 + 2^15 */
1017 felem_square(tmp
, ftmp
);
1018 /* tmp[i] < 17(2^122)
1020 felem_diff_128_64(tmp
, delta
);
1021 /* tmp[i] < 2^127 + 2^63 */
1022 felem_reduce(z_out
, tmp
);
1024 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1025 felem_scalar64(beta
, 4);
1026 /* beta[i] < 2^61 + 2^16 */
1027 felem_diff64(beta
, x_out
);
1028 /* beta[i] < 2^61 + 2^60 + 2^16 */
1029 felem_mul(tmp
, alpha
, beta
);
1031 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1032 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1033 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1036 felem_square(tmp2
, gamma
);
1038 * tmp2[i] < 17*(2^59 + 2^14)^2
1039 * = 17*(2^118 + 2^74 + 2^28)
1041 felem_scalar128(tmp2
, 8);
1043 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1044 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1047 felem_diff128(tmp
, tmp2
);
1049 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1050 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1051 * 2^74 + 2^69 + 2^34 + 2^30
1054 felem_reduce(y_out
, tmp
);
1057 /* copy_conditional copies in to out iff mask is all ones. */
1059 copy_conditional(felem out
, const felem in
, limb mask
)
1062 for (i
= 0; i
< NLIMBS
; ++i
)
1064 const limb tmp
= mask
& (in
[i
] ^ out
[i
]);
1070 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1072 * The method is taken from
1073 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1074 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1076 * This function includes a branch for checking whether the two input points
1077 * are equal (while not equal to the point at infinity). This case never
1078 * happens during single point multiplication, so there is no timing leak for
1079 * ECDH or ECDSA signing. */
1080 static void point_add(felem x3
, felem y3
, felem z3
,
1081 const felem x1
, const felem y1
, const felem z1
,
1082 const int mixed
, const felem x2
, const felem y2
, const felem z2
)
1084 felem ftmp
, ftmp2
, ftmp3
, ftmp4
, ftmp5
, ftmp6
, x_out
, y_out
, z_out
;
1085 largefelem tmp
, tmp2
;
1086 limb x_equal
, y_equal
, z1_is_zero
, z2_is_zero
;
1088 z1_is_zero
= felem_is_zero(z1
);
1089 z2_is_zero
= felem_is_zero(z2
);
1091 /* ftmp = z1z1 = z1**2 */
1092 felem_square(tmp
, z1
);
1093 felem_reduce(ftmp
, tmp
);
1097 /* ftmp2 = z2z2 = z2**2 */
1098 felem_square(tmp
, z2
);
1099 felem_reduce(ftmp2
, tmp
);
1101 /* u1 = ftmp3 = x1*z2z2 */
1102 felem_mul(tmp
, x1
, ftmp2
);
1103 felem_reduce(ftmp3
, tmp
);
1105 /* ftmp5 = z1 + z2 */
1106 felem_assign(ftmp5
, z1
);
1107 felem_sum64(ftmp5
, z2
);
1108 /* ftmp5[i] < 2^61 */
1110 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1111 felem_square(tmp
, ftmp5
);
1112 /* tmp[i] < 17*2^122 */
1113 felem_diff_128_64(tmp
, ftmp
);
1114 /* tmp[i] < 17*2^122 + 2^63 */
1115 felem_diff_128_64(tmp
, ftmp2
);
1116 /* tmp[i] < 17*2^122 + 2^64 */
1117 felem_reduce(ftmp5
, tmp
);
1119 /* ftmp2 = z2 * z2z2 */
1120 felem_mul(tmp
, ftmp2
, z2
);
1121 felem_reduce(ftmp2
, tmp
);
1123 /* s1 = ftmp6 = y1 * z2**3 */
1124 felem_mul(tmp
, y1
, ftmp2
);
1125 felem_reduce(ftmp6
, tmp
);
1129 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1131 /* u1 = ftmp3 = x1*z2z2 */
1132 felem_assign(ftmp3
, x1
);
1134 /* ftmp5 = 2*z1z2 */
1135 felem_scalar(ftmp5
, z1
, 2);
1137 /* s1 = ftmp6 = y1 * z2**3 */
1138 felem_assign(ftmp6
, y1
);
1142 felem_mul(tmp
, x2
, ftmp
);
1143 /* tmp[i] < 17*2^120 */
1145 /* h = ftmp4 = u2 - u1 */
1146 felem_diff_128_64(tmp
, ftmp3
);
1147 /* tmp[i] < 17*2^120 + 2^63 */
1148 felem_reduce(ftmp4
, tmp
);
1150 x_equal
= felem_is_zero(ftmp4
);
1152 /* z_out = ftmp5 * h */
1153 felem_mul(tmp
, ftmp5
, ftmp4
);
1154 felem_reduce(z_out
, tmp
);
1156 /* ftmp = z1 * z1z1 */
1157 felem_mul(tmp
, ftmp
, z1
);
1158 felem_reduce(ftmp
, tmp
);
1160 /* s2 = tmp = y2 * z1**3 */
1161 felem_mul(tmp
, y2
, ftmp
);
1162 /* tmp[i] < 17*2^120 */
1164 /* r = ftmp5 = (s2 - s1)*2 */
1165 felem_diff_128_64(tmp
, ftmp6
);
1166 /* tmp[i] < 17*2^120 + 2^63 */
1167 felem_reduce(ftmp5
, tmp
);
1168 y_equal
= felem_is_zero(ftmp5
);
1169 felem_scalar64(ftmp5
, 2);
1170 /* ftmp5[i] < 2^61 */
1172 if (x_equal
&& y_equal
&& !z1_is_zero
&& !z2_is_zero
)
1174 point_double(x3
, y3
, z3
, x1
, y1
, z1
);
1178 /* I = ftmp = (2h)**2 */
1179 felem_assign(ftmp
, ftmp4
);
1180 felem_scalar64(ftmp
, 2);
1181 /* ftmp[i] < 2^61 */
1182 felem_square(tmp
, ftmp
);
1183 /* tmp[i] < 17*2^122 */
1184 felem_reduce(ftmp
, tmp
);
1186 /* J = ftmp2 = h * I */
1187 felem_mul(tmp
, ftmp4
, ftmp
);
1188 felem_reduce(ftmp2
, tmp
);
1190 /* V = ftmp4 = U1 * I */
1191 felem_mul(tmp
, ftmp3
, ftmp
);
1192 felem_reduce(ftmp4
, tmp
);
1194 /* x_out = r**2 - J - 2V */
1195 felem_square(tmp
, ftmp5
);
1196 /* tmp[i] < 17*2^122 */
1197 felem_diff_128_64(tmp
, ftmp2
);
1198 /* tmp[i] < 17*2^122 + 2^63 */
1199 felem_assign(ftmp3
, ftmp4
);
1200 felem_scalar64(ftmp4
, 2);
1201 /* ftmp4[i] < 2^61 */
1202 felem_diff_128_64(tmp
, ftmp4
);
1203 /* tmp[i] < 17*2^122 + 2^64 */
1204 felem_reduce(x_out
, tmp
);
1206 /* y_out = r(V-x_out) - 2 * s1 * J */
1207 felem_diff64(ftmp3
, x_out
);
1208 /* ftmp3[i] < 2^60 + 2^60
1210 felem_mul(tmp
, ftmp5
, ftmp3
);
1211 /* tmp[i] < 17*2^122 */
1212 felem_mul(tmp2
, ftmp6
, ftmp2
);
1213 /* tmp2[i] < 17*2^120 */
1214 felem_scalar128(tmp2
, 2);
1215 /* tmp2[i] < 17*2^121 */
1216 felem_diff128(tmp
, tmp2
);
1217 /* tmp[i] < 2^127 - 2^69 + 17*2^122
1218 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1220 felem_reduce(y_out
, tmp
);
1222 copy_conditional(x_out
, x2
, z1_is_zero
);
1223 copy_conditional(x_out
, x1
, z2_is_zero
);
1224 copy_conditional(y_out
, y2
, z1_is_zero
);
1225 copy_conditional(y_out
, y1
, z2_is_zero
);
1226 copy_conditional(z_out
, z2
, z1_is_zero
);
1227 copy_conditional(z_out
, z1
, z2_is_zero
);
1228 felem_assign(x3
, x_out
);
1229 felem_assign(y3
, y_out
);
1230 felem_assign(z3
, z_out
);
1234 * Base point pre computation
1235 * --------------------------
1237 * Two different sorts of precomputed tables are used in the following code.
1238 * Each contain various points on the curve, where each point is three field
1239 * elements (x, y, z).
1241 * For the base point table, z is usually 1 (0 for the point at infinity).
1242 * This table has 16 elements:
1243 * index | bits | point
1244 * ------+---------+------------------------------
1247 * 2 | 0 0 1 0 | 2^130G
1248 * 3 | 0 0 1 1 | (2^130 + 1)G
1249 * 4 | 0 1 0 0 | 2^260G
1250 * 5 | 0 1 0 1 | (2^260 + 1)G
1251 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1252 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1253 * 8 | 1 0 0 0 | 2^390G
1254 * 9 | 1 0 0 1 | (2^390 + 1)G
1255 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1256 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1257 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1258 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1259 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1260 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1262 * The reason for this is so that we can clock bits into four different
1263 * locations when doing simple scalar multiplies against the base point.
1265 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1267 /* gmul is the table of precomputed base points */
1268 static const felem gmul
[16][3] =
1269 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1270 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1271 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1272 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1273 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1274 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1275 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1276 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1277 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1278 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1279 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1280 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1281 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1282 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1283 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1284 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1285 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1286 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1287 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1288 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1289 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1290 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1291 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1292 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1293 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1294 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1295 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1296 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1297 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1298 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1299 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1300 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1301 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1302 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1303 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1304 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1305 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1306 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1307 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1308 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1309 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1310 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1311 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1312 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1313 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1314 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1315 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1316 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1317 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1318 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1319 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1320 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1321 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1322 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1323 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1324 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1325 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1326 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1327 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1328 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1329 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1330 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1331 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1332 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1333 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1334 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1335 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1336 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1337 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1338 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1339 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1340 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1341 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1342 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1343 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1344 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1345 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1346 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1347 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1348 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1349 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1350 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1351 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1352 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1353 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1354 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1355 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1356 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1357 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1358 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1359 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1360 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1361 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1362 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1363 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1364 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1365 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1366 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1367 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1368 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1369 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1370 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1371 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1372 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1373 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1374 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1375 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1376 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1378 /* select_point selects the |idx|th point from a precomputation table and
1379 * copies it to out. */
1380 static void select_point(const limb idx
, unsigned int size
, const felem pre_comp
[/* size */][3],
1384 limb
*outlimbs
= &out
[0][0];
1385 memset(outlimbs
, 0, 3 * sizeof(felem
));
1387 for (i
= 0; i
< size
; i
++)
1389 const limb
*inlimbs
= &pre_comp
[i
][0][0];
1390 limb mask
= i
^ idx
;
1396 for (j
= 0; j
< NLIMBS
* 3; j
++)
1397 outlimbs
[j
] |= inlimbs
[j
] & mask
;
1401 /* get_bit returns the |i|th bit in |in| */
1402 static char get_bit(const felem_bytearray in
, int i
)
1406 return (in
[i
>> 3] >> (i
& 7)) & 1;
1409 /* Interleaved point multiplication using precomputed point multiples:
1410 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1411 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1412 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1413 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1414 static void batch_mul(felem x_out
, felem y_out
, felem z_out
,
1415 const felem_bytearray scalars
[], const unsigned num_points
, const u8
*g_scalar
,
1416 const int mixed
, const felem pre_comp
[][17][3], const felem g_pre_comp
[16][3])
1419 unsigned num
, gen_mul
= (g_scalar
!= NULL
);
1420 felem nq
[3], tmp
[4];
1424 /* set nq to the point at infinity */
1425 memset(nq
, 0, 3 * sizeof(felem
));
1427 /* Loop over all scalars msb-to-lsb, interleaving additions
1428 * of multiples of the generator (last quarter of rounds)
1429 * and additions of other points multiples (every 5th round).
1431 skip
= 1; /* save two point operations in the first round */
1432 for (i
= (num_points
? 520 : 130); i
>= 0; --i
)
1436 point_double(nq
[0], nq
[1], nq
[2], nq
[0], nq
[1], nq
[2]);
1438 /* add multiples of the generator */
1439 if (gen_mul
&& (i
<= 130))
1441 bits
= get_bit(g_scalar
, i
+ 390) << 3;
1444 bits
|= get_bit(g_scalar
, i
+ 260) << 2;
1445 bits
|= get_bit(g_scalar
, i
+ 130) << 1;
1446 bits
|= get_bit(g_scalar
, i
);
1448 /* select the point to add, in constant time */
1449 select_point(bits
, 16, g_pre_comp
, tmp
);
1452 point_add(nq
[0], nq
[1], nq
[2],
1453 nq
[0], nq
[1], nq
[2],
1454 1 /* mixed */, tmp
[0], tmp
[1], tmp
[2]);
1458 memcpy(nq
, tmp
, 3 * sizeof(felem
));
1463 /* do other additions every 5 doublings */
1464 if (num_points
&& (i
% 5 == 0))
1466 /* loop over all scalars */
1467 for (num
= 0; num
< num_points
; ++num
)
1469 bits
= get_bit(scalars
[num
], i
+ 4) << 5;
1470 bits
|= get_bit(scalars
[num
], i
+ 3) << 4;
1471 bits
|= get_bit(scalars
[num
], i
+ 2) << 3;
1472 bits
|= get_bit(scalars
[num
], i
+ 1) << 2;
1473 bits
|= get_bit(scalars
[num
], i
) << 1;
1474 bits
|= get_bit(scalars
[num
], i
- 1);
1475 ec_GFp_nistp_recode_scalar_bits(&sign
, &digit
, bits
);
1477 /* select the point to add or subtract, in constant time */
1478 select_point(digit
, 17, pre_comp
[num
], tmp
);
1479 felem_neg(tmp
[3], tmp
[1]); /* (X, -Y, Z) is the negative point */
1480 copy_conditional(tmp
[1], tmp
[3], (-(limb
) sign
));
1484 point_add(nq
[0], nq
[1], nq
[2],
1485 nq
[0], nq
[1], nq
[2],
1486 mixed
, tmp
[0], tmp
[1], tmp
[2]);
1490 memcpy(nq
, tmp
, 3 * sizeof(felem
));
1496 felem_assign(x_out
, nq
[0]);
1497 felem_assign(y_out
, nq
[1]);
1498 felem_assign(z_out
, nq
[2]);
1502 /* Precomputation for the group generator. */
1504 felem g_pre_comp
[16][3];
1506 } NISTP521_PRE_COMP
;
1508 const EC_METHOD
*EC_GFp_nistp521_method(void)
1510 static const EC_METHOD ret
= {
1511 EC_FLAGS_DEFAULT_OCT
,
1512 NID_X9_62_prime_field
,
1513 ec_GFp_nistp521_group_init
,
1514 ec_GFp_simple_group_finish
,
1515 ec_GFp_simple_group_clear_finish
,
1516 ec_GFp_nist_group_copy
,
1517 ec_GFp_nistp521_group_set_curve
,
1518 ec_GFp_simple_group_get_curve
,
1519 ec_GFp_simple_group_get_degree
,
1520 ec_GFp_simple_group_check_discriminant
,
1521 ec_GFp_simple_point_init
,
1522 ec_GFp_simple_point_finish
,
1523 ec_GFp_simple_point_clear_finish
,
1524 ec_GFp_simple_point_copy
,
1525 ec_GFp_simple_point_set_to_infinity
,
1526 ec_GFp_simple_set_Jprojective_coordinates_GFp
,
1527 ec_GFp_simple_get_Jprojective_coordinates_GFp
,
1528 ec_GFp_simple_point_set_affine_coordinates
,
1529 ec_GFp_nistp521_point_get_affine_coordinates
,
1530 0 /* point_set_compressed_coordinates */,
1535 ec_GFp_simple_invert
,
1536 ec_GFp_simple_is_at_infinity
,
1537 ec_GFp_simple_is_on_curve
,
1539 ec_GFp_simple_make_affine
,
1540 ec_GFp_simple_points_make_affine
,
1541 ec_GFp_nistp521_points_mul
,
1542 ec_GFp_nistp521_precompute_mult
,
1543 ec_GFp_nistp521_have_precompute_mult
,
1544 ec_GFp_nist_field_mul
,
1545 ec_GFp_nist_field_sqr
,
1547 0 /* field_encode */,
1548 0 /* field_decode */,
1549 0 /* field_set_to_one */ };
1555 /******************************************************************************/
1556 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1559 static NISTP521_PRE_COMP
*nistp521_pre_comp_new()
1561 NISTP521_PRE_COMP
*ret
= NULL
;
1562 ret
= (NISTP521_PRE_COMP
*)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP
));
1565 ECerr(EC_F_NISTP521_PRE_COMP_NEW
, ERR_R_MALLOC_FAILURE
);
1568 memset(ret
->g_pre_comp
, 0, sizeof(ret
->g_pre_comp
));
1569 ret
->references
= 1;
1573 static void *nistp521_pre_comp_dup(void *src_
)
1575 NISTP521_PRE_COMP
*src
= src_
;
1577 /* no need to actually copy, these objects never change! */
1578 CRYPTO_add(&src
->references
, 1, CRYPTO_LOCK_EC_PRE_COMP
);
1583 static void nistp521_pre_comp_free(void *pre_
)
1586 NISTP521_PRE_COMP
*pre
= pre_
;
1591 i
= CRYPTO_add(&pre
->references
, -1, CRYPTO_LOCK_EC_PRE_COMP
);
1598 static void nistp521_pre_comp_clear_free(void *pre_
)
1601 NISTP521_PRE_COMP
*pre
= pre_
;
1606 i
= CRYPTO_add(&pre
->references
, -1, CRYPTO_LOCK_EC_PRE_COMP
);
1610 OPENSSL_cleanse(pre
, sizeof(*pre
));
1614 /******************************************************************************/
1615 /* OPENSSL EC_METHOD FUNCTIONS
1618 int ec_GFp_nistp521_group_init(EC_GROUP
*group
)
1621 ret
= ec_GFp_simple_group_init(group
);
1622 group
->a_is_minus3
= 1;
1626 int ec_GFp_nistp521_group_set_curve(EC_GROUP
*group
, const BIGNUM
*p
,
1627 const BIGNUM
*a
, const BIGNUM
*b
, BN_CTX
*ctx
)
1630 BN_CTX
*new_ctx
= NULL
;
1631 BIGNUM
*curve_p
, *curve_a
, *curve_b
;
1634 if ((ctx
= new_ctx
= BN_CTX_new()) == NULL
) return 0;
1636 if (((curve_p
= BN_CTX_get(ctx
)) == NULL
) ||
1637 ((curve_a
= BN_CTX_get(ctx
)) == NULL
) ||
1638 ((curve_b
= BN_CTX_get(ctx
)) == NULL
)) goto err
;
1639 BN_bin2bn(nistp521_curve_params
[0], sizeof(felem_bytearray
), curve_p
);
1640 BN_bin2bn(nistp521_curve_params
[1], sizeof(felem_bytearray
), curve_a
);
1641 BN_bin2bn(nistp521_curve_params
[2], sizeof(felem_bytearray
), curve_b
);
1642 if ((BN_cmp(curve_p
, p
)) || (BN_cmp(curve_a
, a
)) ||
1643 (BN_cmp(curve_b
, b
)))
1645 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE
,
1646 EC_R_WRONG_CURVE_PARAMETERS
);
1649 group
->field_mod_func
= BN_nist_mod_521
;
1650 ret
= ec_GFp_simple_group_set_curve(group
, p
, a
, b
, ctx
);
1653 if (new_ctx
!= NULL
)
1654 BN_CTX_free(new_ctx
);
1658 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1659 * (X', Y') = (X/Z^2, Y/Z^3) */
1660 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP
*group
,
1661 const EC_POINT
*point
, BIGNUM
*x
, BIGNUM
*y
, BN_CTX
*ctx
)
1663 felem z1
, z2
, x_in
, y_in
, x_out
, y_out
;
1666 if (EC_POINT_is_at_infinity(group
, point
))
1668 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES
,
1669 EC_R_POINT_AT_INFINITY
);
1672 if ((!BN_to_felem(x_in
, &point
->X
)) || (!BN_to_felem(y_in
, &point
->Y
)) ||
1673 (!BN_to_felem(z1
, &point
->Z
))) return 0;
1675 felem_square(tmp
, z2
); felem_reduce(z1
, tmp
);
1676 felem_mul(tmp
, x_in
, z1
); felem_reduce(x_in
, tmp
);
1677 felem_contract(x_out
, x_in
);
1680 if (!felem_to_BN(x
, x_out
))
1682 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES
, ERR_R_BN_LIB
);
1686 felem_mul(tmp
, z1
, z2
); felem_reduce(z1
, tmp
);
1687 felem_mul(tmp
, y_in
, z1
); felem_reduce(y_in
, tmp
);
1688 felem_contract(y_out
, y_in
);
1691 if (!felem_to_BN(y
, y_out
))
1693 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES
, ERR_R_BN_LIB
);
1700 static void make_points_affine(size_t num
, felem points
[/* num */][3], felem tmp_felems
[/* num+1 */])
1702 /* Runs in constant time, unless an input is the point at infinity
1703 * (which normally shouldn't happen). */
1704 ec_GFp_nistp_points_make_affine_internal(
1709 (void (*)(void *)) felem_one
,
1710 (int (*)(const void *)) felem_is_zero_int
,
1711 (void (*)(void *, const void *)) felem_assign
,
1712 (void (*)(void *, const void *)) felem_square_reduce
,
1713 (void (*)(void *, const void *, const void *)) felem_mul_reduce
,
1714 (void (*)(void *, const void *)) felem_inv
,
1715 (void (*)(void *, const void *)) felem_contract
);
1718 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1719 * Result is stored in r (r can equal one of the inputs). */
1720 int ec_GFp_nistp521_points_mul(const EC_GROUP
*group
, EC_POINT
*r
,
1721 const BIGNUM
*scalar
, size_t num
, const EC_POINT
*points
[],
1722 const BIGNUM
*scalars
[], BN_CTX
*ctx
)
1727 BN_CTX
*new_ctx
= NULL
;
1728 BIGNUM
*x
, *y
, *z
, *tmp_scalar
;
1729 felem_bytearray g_secret
;
1730 felem_bytearray
*secrets
= NULL
;
1731 felem (*pre_comp
)[17][3] = NULL
;
1732 felem
*tmp_felems
= NULL
;
1733 felem_bytearray tmp
;
1734 unsigned i
, num_bytes
;
1735 int have_pre_comp
= 0;
1736 size_t num_points
= num
;
1737 felem x_in
, y_in
, z_in
, x_out
, y_out
, z_out
;
1738 NISTP521_PRE_COMP
*pre
= NULL
;
1739 felem (*g_pre_comp
)[3] = NULL
;
1740 EC_POINT
*generator
= NULL
;
1741 const EC_POINT
*p
= NULL
;
1742 const BIGNUM
*p_scalar
= NULL
;
1745 if ((ctx
= new_ctx
= BN_CTX_new()) == NULL
) return 0;
1747 if (((x
= BN_CTX_get(ctx
)) == NULL
) ||
1748 ((y
= BN_CTX_get(ctx
)) == NULL
) ||
1749 ((z
= BN_CTX_get(ctx
)) == NULL
) ||
1750 ((tmp_scalar
= BN_CTX_get(ctx
)) == NULL
))
1755 pre
= EC_EX_DATA_get_data(group
->extra_data
,
1756 nistp521_pre_comp_dup
, nistp521_pre_comp_free
,
1757 nistp521_pre_comp_clear_free
);
1759 /* we have precomputation, try to use it */
1760 g_pre_comp
= &pre
->g_pre_comp
[0];
1762 /* try to use the standard precomputation */
1763 g_pre_comp
= (felem (*)[3]) gmul
;
1764 generator
= EC_POINT_new(group
);
1765 if (generator
== NULL
)
1767 /* get the generator from precomputation */
1768 if (!felem_to_BN(x
, g_pre_comp
[1][0]) ||
1769 !felem_to_BN(y
, g_pre_comp
[1][1]) ||
1770 !felem_to_BN(z
, g_pre_comp
[1][2]))
1772 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL
, ERR_R_BN_LIB
);
1775 if (!EC_POINT_set_Jprojective_coordinates_GFp(group
,
1776 generator
, x
, y
, z
, ctx
))
1778 if (0 == EC_POINT_cmp(group
, generator
, group
->generator
, ctx
))
1779 /* precomputation matches generator */
1782 /* we don't have valid precomputation:
1783 * treat the generator as a random point */
1789 if (num_points
>= 2)
1791 /* unless we precompute multiples for just one point,
1792 * converting those into affine form is time well spent */
1795 secrets
= OPENSSL_malloc(num_points
* sizeof(felem_bytearray
));
1796 pre_comp
= OPENSSL_malloc(num_points
* 17 * 3 * sizeof(felem
));
1798 tmp_felems
= OPENSSL_malloc((num_points
* 17 + 1) * sizeof(felem
));
1799 if ((secrets
== NULL
) || (pre_comp
== NULL
) || (mixed
&& (tmp_felems
== NULL
)))
1801 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL
, ERR_R_MALLOC_FAILURE
);
1805 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1806 * i.e., they contribute nothing to the linear combination */
1807 memset(secrets
, 0, num_points
* sizeof(felem_bytearray
));
1808 memset(pre_comp
, 0, num_points
* 17 * 3 * sizeof(felem
));
1809 for (i
= 0; i
< num_points
; ++i
)
1812 /* we didn't have a valid precomputation, so we pick
1815 p
= EC_GROUP_get0_generator(group
);
1819 /* the i^th point */
1822 p_scalar
= scalars
[i
];
1824 if ((p_scalar
!= NULL
) && (p
!= NULL
))
1826 /* reduce scalar to 0 <= scalar < 2^521 */
1827 if ((BN_num_bits(p_scalar
) > 521) || (BN_is_negative(p_scalar
)))
1829 /* this is an unusual input, and we don't guarantee
1830 * constant-timeness */
1831 if (!BN_nnmod(tmp_scalar
, p_scalar
, &group
->order
, ctx
))
1833 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL
, ERR_R_BN_LIB
);
1836 num_bytes
= BN_bn2bin(tmp_scalar
, tmp
);
1839 num_bytes
= BN_bn2bin(p_scalar
, tmp
);
1840 flip_endian(secrets
[i
], tmp
, num_bytes
);
1841 /* precompute multiples */
1842 if ((!BN_to_felem(x_out
, &p
->X
)) ||
1843 (!BN_to_felem(y_out
, &p
->Y
)) ||
1844 (!BN_to_felem(z_out
, &p
->Z
))) goto err
;
1845 memcpy(pre_comp
[i
][1][0], x_out
, sizeof(felem
));
1846 memcpy(pre_comp
[i
][1][1], y_out
, sizeof(felem
));
1847 memcpy(pre_comp
[i
][1][2], z_out
, sizeof(felem
));
1848 for (j
= 2; j
<= 16; ++j
)
1853 pre_comp
[i
][j
][0], pre_comp
[i
][j
][1], pre_comp
[i
][j
][2],
1854 pre_comp
[i
][1][0], pre_comp
[i
][1][1], pre_comp
[i
][1][2],
1855 0, pre_comp
[i
][j
-1][0], pre_comp
[i
][j
-1][1], pre_comp
[i
][j
-1][2]);
1860 pre_comp
[i
][j
][0], pre_comp
[i
][j
][1], pre_comp
[i
][j
][2],
1861 pre_comp
[i
][j
/2][0], pre_comp
[i
][j
/2][1], pre_comp
[i
][j
/2][2]);
1867 make_points_affine(num_points
* 17, pre_comp
[0], tmp_felems
);
1870 /* the scalar for the generator */
1871 if ((scalar
!= NULL
) && (have_pre_comp
))
1873 memset(g_secret
, 0, sizeof(g_secret
));
1874 /* reduce scalar to 0 <= scalar < 2^521 */
1875 if ((BN_num_bits(scalar
) > 521) || (BN_is_negative(scalar
)))
1877 /* this is an unusual input, and we don't guarantee
1878 * constant-timeness */
1879 if (!BN_nnmod(tmp_scalar
, scalar
, &group
->order
, ctx
))
1881 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL
, ERR_R_BN_LIB
);
1884 num_bytes
= BN_bn2bin(tmp_scalar
, tmp
);
1887 num_bytes
= BN_bn2bin(scalar
, tmp
);
1888 flip_endian(g_secret
, tmp
, num_bytes
);
1889 /* do the multiplication with generator precomputation*/
1890 batch_mul(x_out
, y_out
, z_out
,
1891 (const felem_bytearray (*)) secrets
, num_points
,
1893 mixed
, (const felem (*)[17][3]) pre_comp
,
1894 (const felem (*)[3]) g_pre_comp
);
1897 /* do the multiplication without generator precomputation */
1898 batch_mul(x_out
, y_out
, z_out
,
1899 (const felem_bytearray (*)) secrets
, num_points
,
1900 NULL
, mixed
, (const felem (*)[17][3]) pre_comp
, NULL
);
1901 /* reduce the output to its unique minimal representation */
1902 felem_contract(x_in
, x_out
);
1903 felem_contract(y_in
, y_out
);
1904 felem_contract(z_in
, z_out
);
1905 if ((!felem_to_BN(x
, x_in
)) || (!felem_to_BN(y
, y_in
)) ||
1906 (!felem_to_BN(z
, z_in
)))
1908 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL
, ERR_R_BN_LIB
);
1911 ret
= EC_POINT_set_Jprojective_coordinates_GFp(group
, r
, x
, y
, z
, ctx
);
1915 if (generator
!= NULL
)
1916 EC_POINT_free(generator
);
1917 if (new_ctx
!= NULL
)
1918 BN_CTX_free(new_ctx
);
1919 if (secrets
!= NULL
)
1920 OPENSSL_free(secrets
);
1921 if (pre_comp
!= NULL
)
1922 OPENSSL_free(pre_comp
);
1923 if (tmp_felems
!= NULL
)
1924 OPENSSL_free(tmp_felems
);
1928 int ec_GFp_nistp521_precompute_mult(EC_GROUP
*group
, BN_CTX
*ctx
)
1931 NISTP521_PRE_COMP
*pre
= NULL
;
1933 BN_CTX
*new_ctx
= NULL
;
1935 EC_POINT
*generator
= NULL
;
1936 felem tmp_felems
[16];
1938 /* throw away old precomputation */
1939 EC_EX_DATA_free_data(&group
->extra_data
, nistp521_pre_comp_dup
,
1940 nistp521_pre_comp_free
, nistp521_pre_comp_clear_free
);
1942 if ((ctx
= new_ctx
= BN_CTX_new()) == NULL
) return 0;
1944 if (((x
= BN_CTX_get(ctx
)) == NULL
) ||
1945 ((y
= BN_CTX_get(ctx
)) == NULL
))
1947 /* get the generator */
1948 if (group
->generator
== NULL
) goto err
;
1949 generator
= EC_POINT_new(group
);
1950 if (generator
== NULL
)
1952 BN_bin2bn(nistp521_curve_params
[3], sizeof (felem_bytearray
), x
);
1953 BN_bin2bn(nistp521_curve_params
[4], sizeof (felem_bytearray
), y
);
1954 if (!EC_POINT_set_affine_coordinates_GFp(group
, generator
, x
, y
, ctx
))
1956 if ((pre
= nistp521_pre_comp_new()) == NULL
)
1958 /* if the generator is the standard one, use built-in precomputation */
1959 if (0 == EC_POINT_cmp(group
, generator
, group
->generator
, ctx
))
1961 memcpy(pre
->g_pre_comp
, gmul
, sizeof(pre
->g_pre_comp
));
1965 if ((!BN_to_felem(pre
->g_pre_comp
[1][0], &group
->generator
->X
)) ||
1966 (!BN_to_felem(pre
->g_pre_comp
[1][1], &group
->generator
->Y
)) ||
1967 (!BN_to_felem(pre
->g_pre_comp
[1][2], &group
->generator
->Z
)))
1969 /* compute 2^130*G, 2^260*G, 2^390*G */
1970 for (i
= 1; i
<= 4; i
<<= 1)
1972 point_double(pre
->g_pre_comp
[2*i
][0], pre
->g_pre_comp
[2*i
][1],
1973 pre
->g_pre_comp
[2*i
][2], pre
->g_pre_comp
[i
][0],
1974 pre
->g_pre_comp
[i
][1], pre
->g_pre_comp
[i
][2]);
1975 for (j
= 0; j
< 129; ++j
)
1977 point_double(pre
->g_pre_comp
[2*i
][0],
1978 pre
->g_pre_comp
[2*i
][1],
1979 pre
->g_pre_comp
[2*i
][2],
1980 pre
->g_pre_comp
[2*i
][0],
1981 pre
->g_pre_comp
[2*i
][1],
1982 pre
->g_pre_comp
[2*i
][2]);
1985 /* g_pre_comp[0] is the point at infinity */
1986 memset(pre
->g_pre_comp
[0], 0, sizeof(pre
->g_pre_comp
[0]));
1987 /* the remaining multiples */
1988 /* 2^130*G + 2^260*G */
1989 point_add(pre
->g_pre_comp
[6][0], pre
->g_pre_comp
[6][1],
1990 pre
->g_pre_comp
[6][2], pre
->g_pre_comp
[4][0],
1991 pre
->g_pre_comp
[4][1], pre
->g_pre_comp
[4][2],
1992 0, pre
->g_pre_comp
[2][0], pre
->g_pre_comp
[2][1],
1993 pre
->g_pre_comp
[2][2]);
1994 /* 2^130*G + 2^390*G */
1995 point_add(pre
->g_pre_comp
[10][0], pre
->g_pre_comp
[10][1],
1996 pre
->g_pre_comp
[10][2], pre
->g_pre_comp
[8][0],
1997 pre
->g_pre_comp
[8][1], pre
->g_pre_comp
[8][2],
1998 0, pre
->g_pre_comp
[2][0], pre
->g_pre_comp
[2][1],
1999 pre
->g_pre_comp
[2][2]);
2000 /* 2^260*G + 2^390*G */
2001 point_add(pre
->g_pre_comp
[12][0], pre
->g_pre_comp
[12][1],
2002 pre
->g_pre_comp
[12][2], pre
->g_pre_comp
[8][0],
2003 pre
->g_pre_comp
[8][1], pre
->g_pre_comp
[8][2],
2004 0, pre
->g_pre_comp
[4][0], pre
->g_pre_comp
[4][1],
2005 pre
->g_pre_comp
[4][2]);
2006 /* 2^130*G + 2^260*G + 2^390*G */
2007 point_add(pre
->g_pre_comp
[14][0], pre
->g_pre_comp
[14][1],
2008 pre
->g_pre_comp
[14][2], pre
->g_pre_comp
[12][0],
2009 pre
->g_pre_comp
[12][1], pre
->g_pre_comp
[12][2],
2010 0, pre
->g_pre_comp
[2][0], pre
->g_pre_comp
[2][1],
2011 pre
->g_pre_comp
[2][2]);
2012 for (i
= 1; i
< 8; ++i
)
2014 /* odd multiples: add G */
2015 point_add(pre
->g_pre_comp
[2*i
+1][0], pre
->g_pre_comp
[2*i
+1][1],
2016 pre
->g_pre_comp
[2*i
+1][2], pre
->g_pre_comp
[2*i
][0],
2017 pre
->g_pre_comp
[2*i
][1], pre
->g_pre_comp
[2*i
][2],
2018 0, pre
->g_pre_comp
[1][0], pre
->g_pre_comp
[1][1],
2019 pre
->g_pre_comp
[1][2]);
2021 make_points_affine(15, &(pre
->g_pre_comp
[1]), tmp_felems
);
2023 if (!EC_EX_DATA_set_data(&group
->extra_data
, pre
, nistp521_pre_comp_dup
,
2024 nistp521_pre_comp_free
, nistp521_pre_comp_clear_free
))
2030 if (generator
!= NULL
)
2031 EC_POINT_free(generator
);
2032 if (new_ctx
!= NULL
)
2033 BN_CTX_free(new_ctx
);
2035 nistp521_pre_comp_free(pre
);
2039 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP
*group
)
2041 if (EC_EX_DATA_get_data(group
->extra_data
, nistp521_pre_comp_dup
,
2042 nistp521_pre_comp_free
, nistp521_pre_comp_clear_free
)
2050 static void *dummy
=&dummy
;