2 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 /* Copyright 2011 Google Inc.
12 * Licensed under the Apache License, Version 2.0 (the "License");
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
17 * http://www.apache.org/licenses/LICENSE-2.0
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
27 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
29 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30 * and Adam Langley's public domain 64-bit C implementation of curve25519
33 #include <openssl/opensslconf.h>
34 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35 NON_EMPTY_TRANSLATION_UNIT
40 # include <openssl/err.h>
43 # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44 /* even with gcc, the typedef won't work for 32-bit platforms */
45 typedef __uint128_t uint128_t
; /* nonstandard; implemented by gcc on 64-bit
48 # error "Your compiler doesn't appear to support 128-bit integer types"
54 /******************************************************************************/
56 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
59 * using 64-bit coefficients called 'limbs',
60 * and sometimes (for multiplication results) as
61 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
62 * using 128-bit coefficients called 'widelimbs'.
63 * A 4-limb representation is an 'felem';
64 * a 7-widelimb representation is a 'widefelem'.
65 * Even within felems, bits of adjacent limbs overlap, and we don't always
66 * reduce the representations: we ensure that inputs to each felem
67 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68 * and fit into a 128-bit word without overflow. The coefficients are then
69 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70 * We only reduce to the unique minimal representation at the end of the
74 typedef uint64_t limb
;
75 typedef uint128_t widelimb
;
77 typedef limb felem
[4];
78 typedef widelimb widefelem
[7];
81 * Field element represented as a byte array. 28*8 = 224 bits is also the
82 * group order size for the elliptic curve, and we also use this type for
83 * scalars for point multiplication.
85 typedef u8 felem_bytearray
[28];
87 static const felem_bytearray nistp224_curve_params
[5] = {
88 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
89 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
90 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
91 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
92 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
94 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
95 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
96 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
97 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
98 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
99 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
100 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
101 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
102 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
106 * Precomputed multiples of the standard generator
107 * Points are given in coordinates (X, Y, Z) where Z normally is 1
108 * (0 for the point at infinity).
109 * For each field element, slice a_0 is word 0, etc.
111 * The table has 2 * 16 elements, starting with the following:
112 * index | bits | point
113 * ------+---------+------------------------------
116 * 2 | 0 0 1 0 | 2^56G
117 * 3 | 0 0 1 1 | (2^56 + 1)G
118 * 4 | 0 1 0 0 | 2^112G
119 * 5 | 0 1 0 1 | (2^112 + 1)G
120 * 6 | 0 1 1 0 | (2^112 + 2^56)G
121 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
122 * 8 | 1 0 0 0 | 2^168G
123 * 9 | 1 0 0 1 | (2^168 + 1)G
124 * 10 | 1 0 1 0 | (2^168 + 2^56)G
125 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
126 * 12 | 1 1 0 0 | (2^168 + 2^112)G
127 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
128 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
129 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
130 * followed by a copy of this with each element multiplied by 2^28.
132 * The reason for this is so that we can clock bits into four different
133 * locations when doing simple scalar multiplies against the base point,
134 * and then another four locations using the second 16 elements.
136 static const felem gmul
[2][16][3] = {
140 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
141 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
143 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
146 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
147 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
149 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
152 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
153 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
155 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
156 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
158 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
159 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
161 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
162 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
164 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
165 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
167 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
168 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
170 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
171 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
173 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
174 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
176 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
177 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
179 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
180 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
182 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
183 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
188 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
189 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
191 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
192 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
194 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
195 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
197 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
198 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
200 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
201 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
203 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
204 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
206 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
207 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
209 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
210 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
212 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
213 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
215 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
216 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
218 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
219 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
221 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
222 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
224 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
225 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
227 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
228 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
230 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
231 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
235 /* Precomputation for the group generator. */
236 struct nistp224_pre_comp_st
{
237 felem g_pre_comp
[2][16][3];
238 CRYPTO_REF_COUNT references
;
242 const EC_METHOD
*EC_GFp_nistp224_method(void)
244 static const EC_METHOD ret
= {
245 EC_FLAGS_DEFAULT_OCT
,
246 NID_X9_62_prime_field
,
247 ec_GFp_nistp224_group_init
,
248 ec_GFp_simple_group_finish
,
249 ec_GFp_simple_group_clear_finish
,
250 ec_GFp_nist_group_copy
,
251 ec_GFp_nistp224_group_set_curve
,
252 ec_GFp_simple_group_get_curve
,
253 ec_GFp_simple_group_get_degree
,
254 ec_group_simple_order_bits
,
255 ec_GFp_simple_group_check_discriminant
,
256 ec_GFp_simple_point_init
,
257 ec_GFp_simple_point_finish
,
258 ec_GFp_simple_point_clear_finish
,
259 ec_GFp_simple_point_copy
,
260 ec_GFp_simple_point_set_to_infinity
,
261 ec_GFp_simple_set_Jprojective_coordinates_GFp
,
262 ec_GFp_simple_get_Jprojective_coordinates_GFp
,
263 ec_GFp_simple_point_set_affine_coordinates
,
264 ec_GFp_nistp224_point_get_affine_coordinates
,
265 0 /* point_set_compressed_coordinates */ ,
270 ec_GFp_simple_invert
,
271 ec_GFp_simple_is_at_infinity
,
272 ec_GFp_simple_is_on_curve
,
274 ec_GFp_simple_make_affine
,
275 ec_GFp_simple_points_make_affine
,
276 ec_GFp_nistp224_points_mul
,
277 ec_GFp_nistp224_precompute_mult
,
278 ec_GFp_nistp224_have_precompute_mult
,
279 ec_GFp_nist_field_mul
,
280 ec_GFp_nist_field_sqr
,
282 ec_GFp_simple_field_inv
,
283 0 /* field_encode */ ,
284 0 /* field_decode */ ,
285 0, /* field_set_to_one */
286 ec_key_simple_priv2oct
,
287 ec_key_simple_oct2priv
,
289 ec_key_simple_generate_key
,
290 ec_key_simple_check_key
,
291 ec_key_simple_generate_public_key
,
294 ecdh_simple_compute_key
,
295 0, /* field_inverse_mod_ord */
296 0, /* blind_coordinates */
306 * Helper functions to convert field elements to/from internal representation
308 static void bin28_to_felem(felem out
, const u8 in
[28])
310 out
[0] = *((const uint64_t *)(in
)) & 0x00ffffffffffffff;
311 out
[1] = (*((const uint64_t *)(in
+ 7))) & 0x00ffffffffffffff;
312 out
[2] = (*((const uint64_t *)(in
+ 14))) & 0x00ffffffffffffff;
313 out
[3] = (*((const uint64_t *)(in
+20))) >> 8;
316 static void felem_to_bin28(u8 out
[28], const felem in
)
319 for (i
= 0; i
< 7; ++i
) {
320 out
[i
] = in
[0] >> (8 * i
);
321 out
[i
+ 7] = in
[1] >> (8 * i
);
322 out
[i
+ 14] = in
[2] >> (8 * i
);
323 out
[i
+ 21] = in
[3] >> (8 * i
);
327 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
328 static void flip_endian(u8
*out
, const u8
*in
, unsigned len
)
331 for (i
= 0; i
< len
; ++i
)
332 out
[i
] = in
[len
- 1 - i
];
335 /* From OpenSSL BIGNUM to internal representation */
336 static int BN_to_felem(felem out
, const BIGNUM
*bn
)
338 felem_bytearray b_in
;
339 felem_bytearray b_out
;
342 /* BN_bn2bin eats leading zeroes */
343 memset(b_out
, 0, sizeof(b_out
));
344 num_bytes
= BN_num_bytes(bn
);
345 if (num_bytes
> sizeof(b_out
)) {
346 ECerr(EC_F_BN_TO_FELEM
, EC_R_BIGNUM_OUT_OF_RANGE
);
349 if (BN_is_negative(bn
)) {
350 ECerr(EC_F_BN_TO_FELEM
, EC_R_BIGNUM_OUT_OF_RANGE
);
353 num_bytes
= BN_bn2bin(bn
, b_in
);
354 flip_endian(b_out
, b_in
, num_bytes
);
355 bin28_to_felem(out
, b_out
);
359 /* From internal representation to OpenSSL BIGNUM */
360 static BIGNUM
*felem_to_BN(BIGNUM
*out
, const felem in
)
362 felem_bytearray b_in
, b_out
;
363 felem_to_bin28(b_in
, in
);
364 flip_endian(b_out
, b_in
, sizeof(b_out
));
365 return BN_bin2bn(b_out
, sizeof(b_out
), out
);
368 /******************************************************************************/
372 * Field operations, using the internal representation of field elements.
373 * NB! These operations are specific to our point multiplication and cannot be
374 * expected to be correct in general - e.g., multiplication with a large scalar
375 * will cause an overflow.
379 static void felem_one(felem out
)
387 static void felem_assign(felem out
, const felem in
)
395 /* Sum two field elements: out += in */
396 static void felem_sum(felem out
, const felem in
)
404 /* Subtract field elements: out -= in */
405 /* Assumes in[i] < 2^57 */
406 static void felem_diff(felem out
, const felem in
)
408 static const limb two58p2
= (((limb
) 1) << 58) + (((limb
) 1) << 2);
409 static const limb two58m2
= (((limb
) 1) << 58) - (((limb
) 1) << 2);
410 static const limb two58m42m2
= (((limb
) 1) << 58) -
411 (((limb
) 1) << 42) - (((limb
) 1) << 2);
413 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
415 out
[1] += two58m42m2
;
425 /* Subtract in unreduced 128-bit mode: out -= in */
426 /* Assumes in[i] < 2^119 */
427 static void widefelem_diff(widefelem out
, const widefelem in
)
429 static const widelimb two120
= ((widelimb
) 1) << 120;
430 static const widelimb two120m64
= (((widelimb
) 1) << 120) -
431 (((widelimb
) 1) << 64);
432 static const widelimb two120m104m64
= (((widelimb
) 1) << 120) -
433 (((widelimb
) 1) << 104) - (((widelimb
) 1) << 64);
435 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
440 out
[4] += two120m104m64
;
453 /* Subtract in mixed mode: out128 -= in64 */
455 static void felem_diff_128_64(widefelem out
, const felem in
)
457 static const widelimb two64p8
= (((widelimb
) 1) << 64) +
458 (((widelimb
) 1) << 8);
459 static const widelimb two64m8
= (((widelimb
) 1) << 64) -
460 (((widelimb
) 1) << 8);
461 static const widelimb two64m48m8
= (((widelimb
) 1) << 64) -
462 (((widelimb
) 1) << 48) - (((widelimb
) 1) << 8);
464 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
466 out
[1] += two64m48m8
;
477 * Multiply a field element by a scalar: out = out * scalar The scalars we
478 * actually use are small, so results fit without overflow
480 static void felem_scalar(felem out
, const limb scalar
)
489 * Multiply an unreduced field element by a scalar: out = out * scalar The
490 * scalars we actually use are small, so results fit without overflow
492 static void widefelem_scalar(widefelem out
, const widelimb scalar
)
503 /* Square a field element: out = in^2 */
504 static void felem_square(widefelem out
, const felem in
)
506 limb tmp0
, tmp1
, tmp2
;
510 out
[0] = ((widelimb
) in
[0]) * in
[0];
511 out
[1] = ((widelimb
) in
[0]) * tmp1
;
512 out
[2] = ((widelimb
) in
[0]) * tmp2
+ ((widelimb
) in
[1]) * in
[1];
513 out
[3] = ((widelimb
) in
[3]) * tmp0
+ ((widelimb
) in
[1]) * tmp2
;
514 out
[4] = ((widelimb
) in
[3]) * tmp1
+ ((widelimb
) in
[2]) * in
[2];
515 out
[5] = ((widelimb
) in
[3]) * tmp2
;
516 out
[6] = ((widelimb
) in
[3]) * in
[3];
519 /* Multiply two field elements: out = in1 * in2 */
520 static void felem_mul(widefelem out
, const felem in1
, const felem in2
)
522 out
[0] = ((widelimb
) in1
[0]) * in2
[0];
523 out
[1] = ((widelimb
) in1
[0]) * in2
[1] + ((widelimb
) in1
[1]) * in2
[0];
524 out
[2] = ((widelimb
) in1
[0]) * in2
[2] + ((widelimb
) in1
[1]) * in2
[1] +
525 ((widelimb
) in1
[2]) * in2
[0];
526 out
[3] = ((widelimb
) in1
[0]) * in2
[3] + ((widelimb
) in1
[1]) * in2
[2] +
527 ((widelimb
) in1
[2]) * in2
[1] + ((widelimb
) in1
[3]) * in2
[0];
528 out
[4] = ((widelimb
) in1
[1]) * in2
[3] + ((widelimb
) in1
[2]) * in2
[2] +
529 ((widelimb
) in1
[3]) * in2
[1];
530 out
[5] = ((widelimb
) in1
[2]) * in2
[3] + ((widelimb
) in1
[3]) * in2
[2];
531 out
[6] = ((widelimb
) in1
[3]) * in2
[3];
535 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
536 * Requires in[i] < 2^126,
537 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
538 static void felem_reduce(felem out
, const widefelem in
)
540 static const widelimb two127p15
= (((widelimb
) 1) << 127) +
541 (((widelimb
) 1) << 15);
542 static const widelimb two127m71
= (((widelimb
) 1) << 127) -
543 (((widelimb
) 1) << 71);
544 static const widelimb two127m71m55
= (((widelimb
) 1) << 127) -
545 (((widelimb
) 1) << 71) - (((widelimb
) 1) << 55);
548 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
549 output
[0] = in
[0] + two127p15
;
550 output
[1] = in
[1] + two127m71m55
;
551 output
[2] = in
[2] + two127m71
;
555 /* Eliminate in[4], in[5], in[6] */
556 output
[4] += in
[6] >> 16;
557 output
[3] += (in
[6] & 0xffff) << 40;
560 output
[3] += in
[5] >> 16;
561 output
[2] += (in
[5] & 0xffff) << 40;
564 output
[2] += output
[4] >> 16;
565 output
[1] += (output
[4] & 0xffff) << 40;
566 output
[0] -= output
[4];
568 /* Carry 2 -> 3 -> 4 */
569 output
[3] += output
[2] >> 56;
570 output
[2] &= 0x00ffffffffffffff;
572 output
[4] = output
[3] >> 56;
573 output
[3] &= 0x00ffffffffffffff;
575 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
577 /* Eliminate output[4] */
578 output
[2] += output
[4] >> 16;
579 /* output[2] < 2^56 + 2^56 = 2^57 */
580 output
[1] += (output
[4] & 0xffff) << 40;
581 output
[0] -= output
[4];
583 /* Carry 0 -> 1 -> 2 -> 3 */
584 output
[1] += output
[0] >> 56;
585 out
[0] = output
[0] & 0x00ffffffffffffff;
587 output
[2] += output
[1] >> 56;
588 /* output[2] < 2^57 + 2^72 */
589 out
[1] = output
[1] & 0x00ffffffffffffff;
590 output
[3] += output
[2] >> 56;
591 /* output[3] <= 2^56 + 2^16 */
592 out
[2] = output
[2] & 0x00ffffffffffffff;
595 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
596 * out[3] <= 2^56 + 2^16 (due to final carry),
602 static void felem_square_reduce(felem out
, const felem in
)
605 felem_square(tmp
, in
);
606 felem_reduce(out
, tmp
);
609 static void felem_mul_reduce(felem out
, const felem in1
, const felem in2
)
612 felem_mul(tmp
, in1
, in2
);
613 felem_reduce(out
, tmp
);
617 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
618 * call felem_reduce first)
620 static void felem_contract(felem out
, const felem in
)
622 static const int64_t two56
= ((limb
) 1) << 56;
623 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
624 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
630 /* Case 1: a = 1 iff in >= 2^224 */
634 tmp
[3] &= 0x00ffffffffffffff;
636 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
637 * and the lower part is non-zero
639 a
= ((in
[3] & in
[2] & (in
[1] | 0x000000ffffffffff)) + 1) |
640 (((int64_t) (in
[0] + (in
[1] & 0x000000ffffffffff)) - 1) >> 63);
641 a
&= 0x00ffffffffffffff;
642 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
644 /* subtract 2^224 - 2^96 + 1 if a is all-one */
645 tmp
[3] &= a
^ 0xffffffffffffffff;
646 tmp
[2] &= a
^ 0xffffffffffffffff;
647 tmp
[1] &= (a
^ 0xffffffffffffffff) | 0x000000ffffffffff;
651 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
652 * non-zero, so we only need one step
658 /* carry 1 -> 2 -> 3 */
659 tmp
[2] += tmp
[1] >> 56;
660 tmp
[1] &= 0x00ffffffffffffff;
662 tmp
[3] += tmp
[2] >> 56;
663 tmp
[2] &= 0x00ffffffffffffff;
665 /* Now 0 <= out < p */
673 * Get negative value: out = -in
674 * Requires in[i] < 2^63,
675 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
677 static void felem_neg(felem out
, const felem in
)
681 memset(tmp
, 0, sizeof(tmp
));
682 felem_diff_128_64(tmp
, in
);
683 felem_reduce(out
, tmp
);
687 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
688 * elements are reduced to in < 2^225, so we only need to check three cases:
689 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
691 static limb
felem_is_zero(const felem in
)
693 limb zero
, two224m96p1
, two225m97p2
;
695 zero
= in
[0] | in
[1] | in
[2] | in
[3];
696 zero
= (((int64_t) (zero
) - 1) >> 63) & 1;
697 two224m96p1
= (in
[0] ^ 1) | (in
[1] ^ 0x00ffff0000000000)
698 | (in
[2] ^ 0x00ffffffffffffff) | (in
[3] ^ 0x00ffffffffffffff);
699 two224m96p1
= (((int64_t) (two224m96p1
) - 1) >> 63) & 1;
700 two225m97p2
= (in
[0] ^ 2) | (in
[1] ^ 0x00fffe0000000000)
701 | (in
[2] ^ 0x00ffffffffffffff) | (in
[3] ^ 0x01ffffffffffffff);
702 two225m97p2
= (((int64_t) (two225m97p2
) - 1) >> 63) & 1;
703 return (zero
| two224m96p1
| two225m97p2
);
706 static int felem_is_zero_int(const void *in
)
708 return (int)(felem_is_zero(in
) & ((limb
) 1));
711 /* Invert a field element */
712 /* Computation chain copied from djb's code */
713 static void felem_inv(felem out
, const felem in
)
715 felem ftmp
, ftmp2
, ftmp3
, ftmp4
;
719 felem_square(tmp
, in
);
720 felem_reduce(ftmp
, tmp
); /* 2 */
721 felem_mul(tmp
, in
, ftmp
);
722 felem_reduce(ftmp
, tmp
); /* 2^2 - 1 */
723 felem_square(tmp
, ftmp
);
724 felem_reduce(ftmp
, tmp
); /* 2^3 - 2 */
725 felem_mul(tmp
, in
, ftmp
);
726 felem_reduce(ftmp
, tmp
); /* 2^3 - 1 */
727 felem_square(tmp
, ftmp
);
728 felem_reduce(ftmp2
, tmp
); /* 2^4 - 2 */
729 felem_square(tmp
, ftmp2
);
730 felem_reduce(ftmp2
, tmp
); /* 2^5 - 4 */
731 felem_square(tmp
, ftmp2
);
732 felem_reduce(ftmp2
, tmp
); /* 2^6 - 8 */
733 felem_mul(tmp
, ftmp2
, ftmp
);
734 felem_reduce(ftmp
, tmp
); /* 2^6 - 1 */
735 felem_square(tmp
, ftmp
);
736 felem_reduce(ftmp2
, tmp
); /* 2^7 - 2 */
737 for (i
= 0; i
< 5; ++i
) { /* 2^12 - 2^6 */
738 felem_square(tmp
, ftmp2
);
739 felem_reduce(ftmp2
, tmp
);
741 felem_mul(tmp
, ftmp2
, ftmp
);
742 felem_reduce(ftmp2
, tmp
); /* 2^12 - 1 */
743 felem_square(tmp
, ftmp2
);
744 felem_reduce(ftmp3
, tmp
); /* 2^13 - 2 */
745 for (i
= 0; i
< 11; ++i
) { /* 2^24 - 2^12 */
746 felem_square(tmp
, ftmp3
);
747 felem_reduce(ftmp3
, tmp
);
749 felem_mul(tmp
, ftmp3
, ftmp2
);
750 felem_reduce(ftmp2
, tmp
); /* 2^24 - 1 */
751 felem_square(tmp
, ftmp2
);
752 felem_reduce(ftmp3
, tmp
); /* 2^25 - 2 */
753 for (i
= 0; i
< 23; ++i
) { /* 2^48 - 2^24 */
754 felem_square(tmp
, ftmp3
);
755 felem_reduce(ftmp3
, tmp
);
757 felem_mul(tmp
, ftmp3
, ftmp2
);
758 felem_reduce(ftmp3
, tmp
); /* 2^48 - 1 */
759 felem_square(tmp
, ftmp3
);
760 felem_reduce(ftmp4
, tmp
); /* 2^49 - 2 */
761 for (i
= 0; i
< 47; ++i
) { /* 2^96 - 2^48 */
762 felem_square(tmp
, ftmp4
);
763 felem_reduce(ftmp4
, tmp
);
765 felem_mul(tmp
, ftmp3
, ftmp4
);
766 felem_reduce(ftmp3
, tmp
); /* 2^96 - 1 */
767 felem_square(tmp
, ftmp3
);
768 felem_reduce(ftmp4
, tmp
); /* 2^97 - 2 */
769 for (i
= 0; i
< 23; ++i
) { /* 2^120 - 2^24 */
770 felem_square(tmp
, ftmp4
);
771 felem_reduce(ftmp4
, tmp
);
773 felem_mul(tmp
, ftmp2
, ftmp4
);
774 felem_reduce(ftmp2
, tmp
); /* 2^120 - 1 */
775 for (i
= 0; i
< 6; ++i
) { /* 2^126 - 2^6 */
776 felem_square(tmp
, ftmp2
);
777 felem_reduce(ftmp2
, tmp
);
779 felem_mul(tmp
, ftmp2
, ftmp
);
780 felem_reduce(ftmp
, tmp
); /* 2^126 - 1 */
781 felem_square(tmp
, ftmp
);
782 felem_reduce(ftmp
, tmp
); /* 2^127 - 2 */
783 felem_mul(tmp
, ftmp
, in
);
784 felem_reduce(ftmp
, tmp
); /* 2^127 - 1 */
785 for (i
= 0; i
< 97; ++i
) { /* 2^224 - 2^97 */
786 felem_square(tmp
, ftmp
);
787 felem_reduce(ftmp
, tmp
);
789 felem_mul(tmp
, ftmp
, ftmp3
);
790 felem_reduce(out
, tmp
); /* 2^224 - 2^96 - 1 */
794 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
797 static void copy_conditional(felem out
, const felem in
, limb icopy
)
801 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
803 const limb copy
= -icopy
;
804 for (i
= 0; i
< 4; ++i
) {
805 const limb tmp
= copy
& (in
[i
] ^ out
[i
]);
810 /******************************************************************************/
812 * ELLIPTIC CURVE POINT OPERATIONS
814 * Points are represented in Jacobian projective coordinates:
815 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
816 * or to the point at infinity if Z == 0.
821 * Double an elliptic curve point:
822 * (X', Y', Z') = 2 * (X, Y, Z), where
823 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
824 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
825 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
826 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
827 * while x_out == y_in is not (maybe this works, but it's not tested).
830 point_double(felem x_out
, felem y_out
, felem z_out
,
831 const felem x_in
, const felem y_in
, const felem z_in
)
834 felem delta
, gamma
, beta
, alpha
, ftmp
, ftmp2
;
836 felem_assign(ftmp
, x_in
);
837 felem_assign(ftmp2
, x_in
);
840 felem_square(tmp
, z_in
);
841 felem_reduce(delta
, tmp
);
844 felem_square(tmp
, y_in
);
845 felem_reduce(gamma
, tmp
);
848 felem_mul(tmp
, x_in
, gamma
);
849 felem_reduce(beta
, tmp
);
851 /* alpha = 3*(x-delta)*(x+delta) */
852 felem_diff(ftmp
, delta
);
853 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
854 felem_sum(ftmp2
, delta
);
855 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
856 felem_scalar(ftmp2
, 3);
857 /* ftmp2[i] < 3 * 2^58 < 2^60 */
858 felem_mul(tmp
, ftmp
, ftmp2
);
859 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
860 felem_reduce(alpha
, tmp
);
862 /* x' = alpha^2 - 8*beta */
863 felem_square(tmp
, alpha
);
864 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
865 felem_assign(ftmp
, beta
);
866 felem_scalar(ftmp
, 8);
867 /* ftmp[i] < 8 * 2^57 = 2^60 */
868 felem_diff_128_64(tmp
, ftmp
);
869 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
870 felem_reduce(x_out
, tmp
);
872 /* z' = (y + z)^2 - gamma - delta */
873 felem_sum(delta
, gamma
);
874 /* delta[i] < 2^57 + 2^57 = 2^58 */
875 felem_assign(ftmp
, y_in
);
876 felem_sum(ftmp
, z_in
);
877 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
878 felem_square(tmp
, ftmp
);
879 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
880 felem_diff_128_64(tmp
, delta
);
881 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
882 felem_reduce(z_out
, tmp
);
884 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
885 felem_scalar(beta
, 4);
886 /* beta[i] < 4 * 2^57 = 2^59 */
887 felem_diff(beta
, x_out
);
888 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
889 felem_mul(tmp
, alpha
, beta
);
890 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
891 felem_square(tmp2
, gamma
);
892 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
893 widefelem_scalar(tmp2
, 8);
894 /* tmp2[i] < 8 * 2^116 = 2^119 */
895 widefelem_diff(tmp
, tmp2
);
896 /* tmp[i] < 2^119 + 2^120 < 2^121 */
897 felem_reduce(y_out
, tmp
);
901 * Add two elliptic curve points:
902 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
903 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
904 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
905 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
906 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
907 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
909 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
913 * This function is not entirely constant-time: it includes a branch for
914 * checking whether the two input points are equal, (while not equal to the
915 * point at infinity). This case never happens during single point
916 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
918 static void point_add(felem x3
, felem y3
, felem z3
,
919 const felem x1
, const felem y1
, const felem z1
,
920 const int mixed
, const felem x2
, const felem y2
,
923 felem ftmp
, ftmp2
, ftmp3
, ftmp4
, ftmp5
, x_out
, y_out
, z_out
;
925 limb z1_is_zero
, z2_is_zero
, x_equal
, y_equal
;
929 felem_square(tmp
, z2
);
930 felem_reduce(ftmp2
, tmp
);
933 felem_mul(tmp
, ftmp2
, z2
);
934 felem_reduce(ftmp4
, tmp
);
936 /* ftmp4 = z2^3*y1 */
937 felem_mul(tmp2
, ftmp4
, y1
);
938 felem_reduce(ftmp4
, tmp2
);
940 /* ftmp2 = z2^2*x1 */
941 felem_mul(tmp2
, ftmp2
, x1
);
942 felem_reduce(ftmp2
, tmp2
);
945 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
948 /* ftmp4 = z2^3*y1 */
949 felem_assign(ftmp4
, y1
);
951 /* ftmp2 = z2^2*x1 */
952 felem_assign(ftmp2
, x1
);
956 felem_square(tmp
, z1
);
957 felem_reduce(ftmp
, tmp
);
960 felem_mul(tmp
, ftmp
, z1
);
961 felem_reduce(ftmp3
, tmp
);
964 felem_mul(tmp
, ftmp3
, y2
);
965 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
967 /* ftmp3 = z1^3*y2 - z2^3*y1 */
968 felem_diff_128_64(tmp
, ftmp4
);
969 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
970 felem_reduce(ftmp3
, tmp
);
973 felem_mul(tmp
, ftmp
, x2
);
974 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
976 /* ftmp = z1^2*x2 - z2^2*x1 */
977 felem_diff_128_64(tmp
, ftmp2
);
978 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
979 felem_reduce(ftmp
, tmp
);
982 * the formulae are incorrect if the points are equal so we check for
983 * this and do doubling if this happens
985 x_equal
= felem_is_zero(ftmp
);
986 y_equal
= felem_is_zero(ftmp3
);
987 z1_is_zero
= felem_is_zero(z1
);
988 z2_is_zero
= felem_is_zero(z2
);
989 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
990 if (x_equal
&& y_equal
&& !z1_is_zero
&& !z2_is_zero
) {
991 point_double(x3
, y3
, z3
, x1
, y1
, z1
);
997 felem_mul(tmp
, z1
, z2
);
998 felem_reduce(ftmp5
, tmp
);
1000 /* special case z2 = 0 is handled later */
1001 felem_assign(ftmp5
, z1
);
1004 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1005 felem_mul(tmp
, ftmp
, ftmp5
);
1006 felem_reduce(z_out
, tmp
);
1008 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1009 felem_assign(ftmp5
, ftmp
);
1010 felem_square(tmp
, ftmp
);
1011 felem_reduce(ftmp
, tmp
);
1013 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1014 felem_mul(tmp
, ftmp
, ftmp5
);
1015 felem_reduce(ftmp5
, tmp
);
1017 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1018 felem_mul(tmp
, ftmp2
, ftmp
);
1019 felem_reduce(ftmp2
, tmp
);
1021 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1022 felem_mul(tmp
, ftmp4
, ftmp5
);
1023 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1025 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1026 felem_square(tmp2
, ftmp3
);
1027 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1029 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1030 felem_diff_128_64(tmp2
, ftmp5
);
1031 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1033 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034 felem_assign(ftmp5
, ftmp2
);
1035 felem_scalar(ftmp5
, 2);
1036 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1039 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1040 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1042 felem_diff_128_64(tmp2
, ftmp5
);
1043 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1044 felem_reduce(x_out
, tmp2
);
1046 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1047 felem_diff(ftmp2
, x_out
);
1048 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1051 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1053 felem_mul(tmp2
, ftmp3
, ftmp2
);
1054 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1057 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1058 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1060 widefelem_diff(tmp2
, tmp
);
1061 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1062 felem_reduce(y_out
, tmp2
);
1065 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1066 * the point at infinity, so we need to check for this separately
1070 * if point 1 is at infinity, copy point 2 to output, and vice versa
1072 copy_conditional(x_out
, x2
, z1_is_zero
);
1073 copy_conditional(x_out
, x1
, z2_is_zero
);
1074 copy_conditional(y_out
, y2
, z1_is_zero
);
1075 copy_conditional(y_out
, y1
, z2_is_zero
);
1076 copy_conditional(z_out
, z2
, z1_is_zero
);
1077 copy_conditional(z_out
, z1
, z2_is_zero
);
1078 felem_assign(x3
, x_out
);
1079 felem_assign(y3
, y_out
);
1080 felem_assign(z3
, z_out
);
1084 * select_point selects the |idx|th point from a precomputation table and
1086 * The pre_comp array argument should be size of |size| argument
1088 static void select_point(const u64 idx
, unsigned int size
,
1089 const felem pre_comp
[][3], felem out
[3])
1092 limb
*outlimbs
= &out
[0][0];
1094 memset(out
, 0, sizeof(*out
) * 3);
1095 for (i
= 0; i
< size
; i
++) {
1096 const limb
*inlimbs
= &pre_comp
[i
][0][0];
1103 for (j
= 0; j
< 4 * 3; j
++)
1104 outlimbs
[j
] |= inlimbs
[j
] & mask
;
1108 /* get_bit returns the |i|th bit in |in| */
1109 static char get_bit(const felem_bytearray in
, unsigned i
)
1113 return (in
[i
>> 3] >> (i
& 7)) & 1;
1117 * Interleaved point multiplication using precomputed point multiples: The
1118 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1119 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1120 * generator, using certain (large) precomputed multiples in g_pre_comp.
1121 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1123 static void batch_mul(felem x_out
, felem y_out
, felem z_out
,
1124 const felem_bytearray scalars
[],
1125 const unsigned num_points
, const u8
*g_scalar
,
1126 const int mixed
, const felem pre_comp
[][17][3],
1127 const felem g_pre_comp
[2][16][3])
1131 unsigned gen_mul
= (g_scalar
!= NULL
);
1132 felem nq
[3], tmp
[4];
1136 /* set nq to the point at infinity */
1137 memset(nq
, 0, sizeof(nq
));
1140 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1141 * of the generator (two in each of the last 28 rounds) and additions of
1142 * other points multiples (every 5th round).
1144 skip
= 1; /* save two point operations in the first
1146 for (i
= (num_points
? 220 : 27); i
>= 0; --i
) {
1149 point_double(nq
[0], nq
[1], nq
[2], nq
[0], nq
[1], nq
[2]);
1151 /* add multiples of the generator */
1152 if (gen_mul
&& (i
<= 27)) {
1153 /* first, look 28 bits upwards */
1154 bits
= get_bit(g_scalar
, i
+ 196) << 3;
1155 bits
|= get_bit(g_scalar
, i
+ 140) << 2;
1156 bits
|= get_bit(g_scalar
, i
+ 84) << 1;
1157 bits
|= get_bit(g_scalar
, i
+ 28);
1158 /* select the point to add, in constant time */
1159 select_point(bits
, 16, g_pre_comp
[1], tmp
);
1162 /* value 1 below is argument for "mixed" */
1163 point_add(nq
[0], nq
[1], nq
[2],
1164 nq
[0], nq
[1], nq
[2], 1, tmp
[0], tmp
[1], tmp
[2]);
1166 memcpy(nq
, tmp
, 3 * sizeof(felem
));
1170 /* second, look at the current position */
1171 bits
= get_bit(g_scalar
, i
+ 168) << 3;
1172 bits
|= get_bit(g_scalar
, i
+ 112) << 2;
1173 bits
|= get_bit(g_scalar
, i
+ 56) << 1;
1174 bits
|= get_bit(g_scalar
, i
);
1175 /* select the point to add, in constant time */
1176 select_point(bits
, 16, g_pre_comp
[0], tmp
);
1177 point_add(nq
[0], nq
[1], nq
[2],
1178 nq
[0], nq
[1], nq
[2],
1179 1 /* mixed */ , tmp
[0], tmp
[1], tmp
[2]);
1182 /* do other additions every 5 doublings */
1183 if (num_points
&& (i
% 5 == 0)) {
1184 /* loop over all scalars */
1185 for (num
= 0; num
< num_points
; ++num
) {
1186 bits
= get_bit(scalars
[num
], i
+ 4) << 5;
1187 bits
|= get_bit(scalars
[num
], i
+ 3) << 4;
1188 bits
|= get_bit(scalars
[num
], i
+ 2) << 3;
1189 bits
|= get_bit(scalars
[num
], i
+ 1) << 2;
1190 bits
|= get_bit(scalars
[num
], i
) << 1;
1191 bits
|= get_bit(scalars
[num
], i
- 1);
1192 ec_GFp_nistp_recode_scalar_bits(&sign
, &digit
, bits
);
1194 /* select the point to add or subtract */
1195 select_point(digit
, 17, pre_comp
[num
], tmp
);
1196 felem_neg(tmp
[3], tmp
[1]); /* (X, -Y, Z) is the negative
1198 copy_conditional(tmp
[1], tmp
[3], sign
);
1201 point_add(nq
[0], nq
[1], nq
[2],
1202 nq
[0], nq
[1], nq
[2],
1203 mixed
, tmp
[0], tmp
[1], tmp
[2]);
1205 memcpy(nq
, tmp
, 3 * sizeof(felem
));
1211 felem_assign(x_out
, nq
[0]);
1212 felem_assign(y_out
, nq
[1]);
1213 felem_assign(z_out
, nq
[2]);
1216 /******************************************************************************/
1218 * FUNCTIONS TO MANAGE PRECOMPUTATION
1221 static NISTP224_PRE_COMP
*nistp224_pre_comp_new(void)
1223 NISTP224_PRE_COMP
*ret
= OPENSSL_zalloc(sizeof(*ret
));
1226 ECerr(EC_F_NISTP224_PRE_COMP_NEW
, ERR_R_MALLOC_FAILURE
);
1230 ret
->references
= 1;
1232 ret
->lock
= CRYPTO_THREAD_lock_new();
1233 if (ret
->lock
== NULL
) {
1234 ECerr(EC_F_NISTP224_PRE_COMP_NEW
, ERR_R_MALLOC_FAILURE
);
1241 NISTP224_PRE_COMP
*EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP
*p
)
1245 CRYPTO_UP_REF(&p
->references
, &i
, p
->lock
);
1249 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP
*p
)
1256 CRYPTO_DOWN_REF(&p
->references
, &i
, p
->lock
);
1257 REF_PRINT_COUNT("EC_nistp224", x
);
1260 REF_ASSERT_ISNT(i
< 0);
1262 CRYPTO_THREAD_lock_free(p
->lock
);
1266 /******************************************************************************/
1268 * OPENSSL EC_METHOD FUNCTIONS
1271 int ec_GFp_nistp224_group_init(EC_GROUP
*group
)
1274 ret
= ec_GFp_simple_group_init(group
);
1275 group
->a_is_minus3
= 1;
1279 int ec_GFp_nistp224_group_set_curve(EC_GROUP
*group
, const BIGNUM
*p
,
1280 const BIGNUM
*a
, const BIGNUM
*b
,
1284 BIGNUM
*curve_p
, *curve_a
, *curve_b
;
1286 BN_CTX
*new_ctx
= NULL
;
1289 ctx
= new_ctx
= BN_CTX_new();
1295 curve_p
= BN_CTX_get(ctx
);
1296 curve_a
= BN_CTX_get(ctx
);
1297 curve_b
= BN_CTX_get(ctx
);
1298 if (curve_b
== NULL
)
1300 BN_bin2bn(nistp224_curve_params
[0], sizeof(felem_bytearray
), curve_p
);
1301 BN_bin2bn(nistp224_curve_params
[1], sizeof(felem_bytearray
), curve_a
);
1302 BN_bin2bn(nistp224_curve_params
[2], sizeof(felem_bytearray
), curve_b
);
1303 if ((BN_cmp(curve_p
, p
)) || (BN_cmp(curve_a
, a
)) || (BN_cmp(curve_b
, b
))) {
1304 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE
,
1305 EC_R_WRONG_CURVE_PARAMETERS
);
1308 group
->field_mod_func
= BN_nist_mod_224
;
1309 ret
= ec_GFp_simple_group_set_curve(group
, p
, a
, b
, ctx
);
1313 BN_CTX_free(new_ctx
);
1319 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1322 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP
*group
,
1323 const EC_POINT
*point
,
1324 BIGNUM
*x
, BIGNUM
*y
,
1327 felem z1
, z2
, x_in
, y_in
, x_out
, y_out
;
1330 if (EC_POINT_is_at_infinity(group
, point
)) {
1331 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES
,
1332 EC_R_POINT_AT_INFINITY
);
1335 if ((!BN_to_felem(x_in
, point
->X
)) || (!BN_to_felem(y_in
, point
->Y
)) ||
1336 (!BN_to_felem(z1
, point
->Z
)))
1339 felem_square(tmp
, z2
);
1340 felem_reduce(z1
, tmp
);
1341 felem_mul(tmp
, x_in
, z1
);
1342 felem_reduce(x_in
, tmp
);
1343 felem_contract(x_out
, x_in
);
1345 if (!felem_to_BN(x
, x_out
)) {
1346 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES
,
1351 felem_mul(tmp
, z1
, z2
);
1352 felem_reduce(z1
, tmp
);
1353 felem_mul(tmp
, y_in
, z1
);
1354 felem_reduce(y_in
, tmp
);
1355 felem_contract(y_out
, y_in
);
1357 if (!felem_to_BN(y
, y_out
)) {
1358 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES
,
1366 static void make_points_affine(size_t num
, felem points
[ /* num */ ][3],
1367 felem tmp_felems
[ /* num+1 */ ])
1370 * Runs in constant time, unless an input is the point at infinity (which
1371 * normally shouldn't happen).
1373 ec_GFp_nistp_points_make_affine_internal(num
,
1377 (void (*)(void *))felem_one
,
1379 (void (*)(void *, const void *))
1381 (void (*)(void *, const void *))
1382 felem_square_reduce
, (void (*)
1389 (void (*)(void *, const void *))
1391 (void (*)(void *, const void *))
1396 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1397 * values Result is stored in r (r can equal one of the inputs).
1399 int ec_GFp_nistp224_points_mul(const EC_GROUP
*group
, EC_POINT
*r
,
1400 const BIGNUM
*scalar
, size_t num
,
1401 const EC_POINT
*points
[],
1402 const BIGNUM
*scalars
[], BN_CTX
*ctx
)
1408 BIGNUM
*x
, *y
, *z
, *tmp_scalar
;
1409 felem_bytearray g_secret
;
1410 felem_bytearray
*secrets
= NULL
;
1411 felem (*pre_comp
)[17][3] = NULL
;
1412 felem
*tmp_felems
= NULL
;
1413 felem_bytearray tmp
;
1415 int have_pre_comp
= 0;
1416 size_t num_points
= num
;
1417 felem x_in
, y_in
, z_in
, x_out
, y_out
, z_out
;
1418 NISTP224_PRE_COMP
*pre
= NULL
;
1419 const felem(*g_pre_comp
)[16][3] = NULL
;
1420 EC_POINT
*generator
= NULL
;
1421 const EC_POINT
*p
= NULL
;
1422 const BIGNUM
*p_scalar
= NULL
;
1425 x
= BN_CTX_get(ctx
);
1426 y
= BN_CTX_get(ctx
);
1427 z
= BN_CTX_get(ctx
);
1428 tmp_scalar
= BN_CTX_get(ctx
);
1429 if (tmp_scalar
== NULL
)
1432 if (scalar
!= NULL
) {
1433 pre
= group
->pre_comp
.nistp224
;
1435 /* we have precomputation, try to use it */
1436 g_pre_comp
= (const felem(*)[16][3])pre
->g_pre_comp
;
1438 /* try to use the standard precomputation */
1439 g_pre_comp
= &gmul
[0];
1440 generator
= EC_POINT_new(group
);
1441 if (generator
== NULL
)
1443 /* get the generator from precomputation */
1444 if (!felem_to_BN(x
, g_pre_comp
[0][1][0]) ||
1445 !felem_to_BN(y
, g_pre_comp
[0][1][1]) ||
1446 !felem_to_BN(z
, g_pre_comp
[0][1][2])) {
1447 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL
, ERR_R_BN_LIB
);
1450 if (!EC_POINT_set_Jprojective_coordinates_GFp(group
,
1454 if (0 == EC_POINT_cmp(group
, generator
, group
->generator
, ctx
))
1455 /* precomputation matches generator */
1459 * we don't have valid precomputation: treat the generator as a
1462 num_points
= num_points
+ 1;
1465 if (num_points
> 0) {
1466 if (num_points
>= 3) {
1468 * unless we precompute multiples for just one or two points,
1469 * converting those into affine form is time well spent
1473 secrets
= OPENSSL_zalloc(sizeof(*secrets
) * num_points
);
1474 pre_comp
= OPENSSL_zalloc(sizeof(*pre_comp
) * num_points
);
1477 OPENSSL_malloc(sizeof(felem
) * (num_points
* 17 + 1));
1478 if ((secrets
== NULL
) || (pre_comp
== NULL
)
1479 || (mixed
&& (tmp_felems
== NULL
))) {
1480 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL
, ERR_R_MALLOC_FAILURE
);
1485 * we treat NULL scalars as 0, and NULL points as points at infinity,
1486 * i.e., they contribute nothing to the linear combination
1488 for (i
= 0; i
< num_points
; ++i
) {
1492 p
= EC_GROUP_get0_generator(group
);
1495 /* the i^th point */
1498 p_scalar
= scalars
[i
];
1500 if ((p_scalar
!= NULL
) && (p
!= NULL
)) {
1501 /* reduce scalar to 0 <= scalar < 2^224 */
1502 if ((BN_num_bits(p_scalar
) > 224)
1503 || (BN_is_negative(p_scalar
))) {
1505 * this is an unusual input, and we don't guarantee
1508 if (!BN_nnmod(tmp_scalar
, p_scalar
, group
->order
, ctx
)) {
1509 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL
, ERR_R_BN_LIB
);
1512 num_bytes
= BN_bn2bin(tmp_scalar
, tmp
);
1514 num_bytes
= BN_bn2bin(p_scalar
, tmp
);
1515 flip_endian(secrets
[i
], tmp
, num_bytes
);
1516 /* precompute multiples */
1517 if ((!BN_to_felem(x_out
, p
->X
)) ||
1518 (!BN_to_felem(y_out
, p
->Y
)) ||
1519 (!BN_to_felem(z_out
, p
->Z
)))
1521 felem_assign(pre_comp
[i
][1][0], x_out
);
1522 felem_assign(pre_comp
[i
][1][1], y_out
);
1523 felem_assign(pre_comp
[i
][1][2], z_out
);
1524 for (j
= 2; j
<= 16; ++j
) {
1526 point_add(pre_comp
[i
][j
][0], pre_comp
[i
][j
][1],
1527 pre_comp
[i
][j
][2], pre_comp
[i
][1][0],
1528 pre_comp
[i
][1][1], pre_comp
[i
][1][2], 0,
1529 pre_comp
[i
][j
- 1][0],
1530 pre_comp
[i
][j
- 1][1],
1531 pre_comp
[i
][j
- 1][2]);
1533 point_double(pre_comp
[i
][j
][0], pre_comp
[i
][j
][1],
1534 pre_comp
[i
][j
][2], pre_comp
[i
][j
/ 2][0],
1535 pre_comp
[i
][j
/ 2][1],
1536 pre_comp
[i
][j
/ 2][2]);
1542 make_points_affine(num_points
* 17, pre_comp
[0], tmp_felems
);
1545 /* the scalar for the generator */
1546 if ((scalar
!= NULL
) && (have_pre_comp
)) {
1547 memset(g_secret
, 0, sizeof(g_secret
));
1548 /* reduce scalar to 0 <= scalar < 2^224 */
1549 if ((BN_num_bits(scalar
) > 224) || (BN_is_negative(scalar
))) {
1551 * this is an unusual input, and we don't guarantee
1554 if (!BN_nnmod(tmp_scalar
, scalar
, group
->order
, ctx
)) {
1555 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL
, ERR_R_BN_LIB
);
1558 num_bytes
= BN_bn2bin(tmp_scalar
, tmp
);
1560 num_bytes
= BN_bn2bin(scalar
, tmp
);
1561 flip_endian(g_secret
, tmp
, num_bytes
);
1562 /* do the multiplication with generator precomputation */
1563 batch_mul(x_out
, y_out
, z_out
,
1564 (const felem_bytearray(*))secrets
, num_points
,
1566 mixed
, (const felem(*)[17][3])pre_comp
, g_pre_comp
);
1568 /* do the multiplication without generator precomputation */
1569 batch_mul(x_out
, y_out
, z_out
,
1570 (const felem_bytearray(*))secrets
, num_points
,
1571 NULL
, mixed
, (const felem(*)[17][3])pre_comp
, NULL
);
1572 /* reduce the output to its unique minimal representation */
1573 felem_contract(x_in
, x_out
);
1574 felem_contract(y_in
, y_out
);
1575 felem_contract(z_in
, z_out
);
1576 if ((!felem_to_BN(x
, x_in
)) || (!felem_to_BN(y
, y_in
)) ||
1577 (!felem_to_BN(z
, z_in
))) {
1578 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL
, ERR_R_BN_LIB
);
1581 ret
= EC_POINT_set_Jprojective_coordinates_GFp(group
, r
, x
, y
, z
, ctx
);
1585 EC_POINT_free(generator
);
1586 OPENSSL_free(secrets
);
1587 OPENSSL_free(pre_comp
);
1588 OPENSSL_free(tmp_felems
);
1592 int ec_GFp_nistp224_precompute_mult(EC_GROUP
*group
, BN_CTX
*ctx
)
1595 NISTP224_PRE_COMP
*pre
= NULL
;
1598 EC_POINT
*generator
= NULL
;
1599 felem tmp_felems
[32];
1601 BN_CTX
*new_ctx
= NULL
;
1604 /* throw away old precomputation */
1605 EC_pre_comp_free(group
);
1609 ctx
= new_ctx
= BN_CTX_new();
1615 x
= BN_CTX_get(ctx
);
1616 y
= BN_CTX_get(ctx
);
1619 /* get the generator */
1620 if (group
->generator
== NULL
)
1622 generator
= EC_POINT_new(group
);
1623 if (generator
== NULL
)
1625 BN_bin2bn(nistp224_curve_params
[3], sizeof(felem_bytearray
), x
);
1626 BN_bin2bn(nistp224_curve_params
[4], sizeof(felem_bytearray
), y
);
1627 if (!EC_POINT_set_affine_coordinates(group
, generator
, x
, y
, ctx
))
1629 if ((pre
= nistp224_pre_comp_new()) == NULL
)
1632 * if the generator is the standard one, use built-in precomputation
1634 if (0 == EC_POINT_cmp(group
, generator
, group
->generator
, ctx
)) {
1635 memcpy(pre
->g_pre_comp
, gmul
, sizeof(pre
->g_pre_comp
));
1638 if ((!BN_to_felem(pre
->g_pre_comp
[0][1][0], group
->generator
->X
)) ||
1639 (!BN_to_felem(pre
->g_pre_comp
[0][1][1], group
->generator
->Y
)) ||
1640 (!BN_to_felem(pre
->g_pre_comp
[0][1][2], group
->generator
->Z
)))
1643 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1644 * 2^140*G, 2^196*G for the second one
1646 for (i
= 1; i
<= 8; i
<<= 1) {
1647 point_double(pre
->g_pre_comp
[1][i
][0], pre
->g_pre_comp
[1][i
][1],
1648 pre
->g_pre_comp
[1][i
][2], pre
->g_pre_comp
[0][i
][0],
1649 pre
->g_pre_comp
[0][i
][1], pre
->g_pre_comp
[0][i
][2]);
1650 for (j
= 0; j
< 27; ++j
) {
1651 point_double(pre
->g_pre_comp
[1][i
][0], pre
->g_pre_comp
[1][i
][1],
1652 pre
->g_pre_comp
[1][i
][2], pre
->g_pre_comp
[1][i
][0],
1653 pre
->g_pre_comp
[1][i
][1], pre
->g_pre_comp
[1][i
][2]);
1657 point_double(pre
->g_pre_comp
[0][2 * i
][0],
1658 pre
->g_pre_comp
[0][2 * i
][1],
1659 pre
->g_pre_comp
[0][2 * i
][2], pre
->g_pre_comp
[1][i
][0],
1660 pre
->g_pre_comp
[1][i
][1], pre
->g_pre_comp
[1][i
][2]);
1661 for (j
= 0; j
< 27; ++j
) {
1662 point_double(pre
->g_pre_comp
[0][2 * i
][0],
1663 pre
->g_pre_comp
[0][2 * i
][1],
1664 pre
->g_pre_comp
[0][2 * i
][2],
1665 pre
->g_pre_comp
[0][2 * i
][0],
1666 pre
->g_pre_comp
[0][2 * i
][1],
1667 pre
->g_pre_comp
[0][2 * i
][2]);
1670 for (i
= 0; i
< 2; i
++) {
1671 /* g_pre_comp[i][0] is the point at infinity */
1672 memset(pre
->g_pre_comp
[i
][0], 0, sizeof(pre
->g_pre_comp
[i
][0]));
1673 /* the remaining multiples */
1674 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1675 point_add(pre
->g_pre_comp
[i
][6][0], pre
->g_pre_comp
[i
][6][1],
1676 pre
->g_pre_comp
[i
][6][2], pre
->g_pre_comp
[i
][4][0],
1677 pre
->g_pre_comp
[i
][4][1], pre
->g_pre_comp
[i
][4][2],
1678 0, pre
->g_pre_comp
[i
][2][0], pre
->g_pre_comp
[i
][2][1],
1679 pre
->g_pre_comp
[i
][2][2]);
1680 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1681 point_add(pre
->g_pre_comp
[i
][10][0], pre
->g_pre_comp
[i
][10][1],
1682 pre
->g_pre_comp
[i
][10][2], pre
->g_pre_comp
[i
][8][0],
1683 pre
->g_pre_comp
[i
][8][1], pre
->g_pre_comp
[i
][8][2],
1684 0, pre
->g_pre_comp
[i
][2][0], pre
->g_pre_comp
[i
][2][1],
1685 pre
->g_pre_comp
[i
][2][2]);
1686 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1687 point_add(pre
->g_pre_comp
[i
][12][0], pre
->g_pre_comp
[i
][12][1],
1688 pre
->g_pre_comp
[i
][12][2], pre
->g_pre_comp
[i
][8][0],
1689 pre
->g_pre_comp
[i
][8][1], pre
->g_pre_comp
[i
][8][2],
1690 0, pre
->g_pre_comp
[i
][4][0], pre
->g_pre_comp
[i
][4][1],
1691 pre
->g_pre_comp
[i
][4][2]);
1693 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1695 point_add(pre
->g_pre_comp
[i
][14][0], pre
->g_pre_comp
[i
][14][1],
1696 pre
->g_pre_comp
[i
][14][2], pre
->g_pre_comp
[i
][12][0],
1697 pre
->g_pre_comp
[i
][12][1], pre
->g_pre_comp
[i
][12][2],
1698 0, pre
->g_pre_comp
[i
][2][0], pre
->g_pre_comp
[i
][2][1],
1699 pre
->g_pre_comp
[i
][2][2]);
1700 for (j
= 1; j
< 8; ++j
) {
1701 /* odd multiples: add G resp. 2^28*G */
1702 point_add(pre
->g_pre_comp
[i
][2 * j
+ 1][0],
1703 pre
->g_pre_comp
[i
][2 * j
+ 1][1],
1704 pre
->g_pre_comp
[i
][2 * j
+ 1][2],
1705 pre
->g_pre_comp
[i
][2 * j
][0],
1706 pre
->g_pre_comp
[i
][2 * j
][1],
1707 pre
->g_pre_comp
[i
][2 * j
][2], 0,
1708 pre
->g_pre_comp
[i
][1][0], pre
->g_pre_comp
[i
][1][1],
1709 pre
->g_pre_comp
[i
][1][2]);
1712 make_points_affine(31, &(pre
->g_pre_comp
[0][1]), tmp_felems
);
1715 SETPRECOMP(group
, nistp224
, pre
);
1720 EC_POINT_free(generator
);
1722 BN_CTX_free(new_ctx
);
1724 EC_nistp224_pre_comp_free(pre
);
1728 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP
*group
)
1730 return HAVEPRECOMP(group
, nistp224
);