]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp224.c
Update copyright year
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
1 /*
2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26 /*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30 #include "internal/deprecated.h"
31
32 /*
33 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34 *
35 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36 * and Adam Langley's public domain 64-bit C implementation of curve25519
37 */
38
39 #include <openssl/opensslconf.h>
40
41 #include <stdint.h>
42 #include <string.h>
43 #include <openssl/err.h>
44 #include "ec_local.h"
45
46 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
47 /* even with gcc, the typedef won't work for 32-bit platforms */
48 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
49 * platforms */
50 #else
51 # error "Your compiler doesn't appear to support 128-bit integer types"
52 #endif
53
54 typedef uint8_t u8;
55 typedef uint64_t u64;
56
57 /******************************************************************************/
58 /*-
59 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
60 *
61 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
62 * using 64-bit coefficients called 'limbs',
63 * and sometimes (for multiplication results) as
64 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
65 * using 128-bit coefficients called 'widelimbs'.
66 * A 4-limb representation is an 'felem';
67 * a 7-widelimb representation is a 'widefelem'.
68 * Even within felems, bits of adjacent limbs overlap, and we don't always
69 * reduce the representations: we ensure that inputs to each felem
70 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
71 * and fit into a 128-bit word without overflow. The coefficients are then
72 * again partially reduced to obtain an felem satisfying a_i < 2^57.
73 * We only reduce to the unique minimal representation at the end of the
74 * computation.
75 */
76
77 typedef uint64_t limb;
78 typedef uint64_t limb_aX __attribute((__aligned__(1)));
79 typedef uint128_t widelimb;
80
81 typedef limb felem[4];
82 typedef widelimb widefelem[7];
83
84 /*
85 * Field element represented as a byte array. 28*8 = 224 bits is also the
86 * group order size for the elliptic curve, and we also use this type for
87 * scalars for point multiplication.
88 */
89 typedef u8 felem_bytearray[28];
90
91 static const felem_bytearray nistp224_curve_params[5] = {
92 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
94 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
95 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
96 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
97 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
98 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
99 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
100 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
101 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
102 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
103 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
104 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
105 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
106 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
107 };
108
109 /*-
110 * Precomputed multiples of the standard generator
111 * Points are given in coordinates (X, Y, Z) where Z normally is 1
112 * (0 for the point at infinity).
113 * For each field element, slice a_0 is word 0, etc.
114 *
115 * The table has 2 * 16 elements, starting with the following:
116 * index | bits | point
117 * ------+---------+------------------------------
118 * 0 | 0 0 0 0 | 0G
119 * 1 | 0 0 0 1 | 1G
120 * 2 | 0 0 1 0 | 2^56G
121 * 3 | 0 0 1 1 | (2^56 + 1)G
122 * 4 | 0 1 0 0 | 2^112G
123 * 5 | 0 1 0 1 | (2^112 + 1)G
124 * 6 | 0 1 1 0 | (2^112 + 2^56)G
125 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
126 * 8 | 1 0 0 0 | 2^168G
127 * 9 | 1 0 0 1 | (2^168 + 1)G
128 * 10 | 1 0 1 0 | (2^168 + 2^56)G
129 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
130 * 12 | 1 1 0 0 | (2^168 + 2^112)G
131 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
132 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
133 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
134 * followed by a copy of this with each element multiplied by 2^28.
135 *
136 * The reason for this is so that we can clock bits into four different
137 * locations when doing simple scalar multiplies against the base point,
138 * and then another four locations using the second 16 elements.
139 */
140 static const felem gmul[2][16][3] = {
141 {{{0, 0, 0, 0},
142 {0, 0, 0, 0},
143 {0, 0, 0, 0}},
144 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
145 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
146 {1, 0, 0, 0}},
147 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
148 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
149 {1, 0, 0, 0}},
150 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
151 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
152 {1, 0, 0, 0}},
153 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
155 {1, 0, 0, 0}},
156 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
157 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
158 {1, 0, 0, 0}},
159 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
160 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
161 {1, 0, 0, 0}},
162 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
163 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
164 {1, 0, 0, 0}},
165 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
166 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
167 {1, 0, 0, 0}},
168 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
169 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
170 {1, 0, 0, 0}},
171 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
172 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
173 {1, 0, 0, 0}},
174 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
175 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
176 {1, 0, 0, 0}},
177 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
178 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
179 {1, 0, 0, 0}},
180 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
181 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
182 {1, 0, 0, 0}},
183 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
184 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
185 {1, 0, 0, 0}},
186 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
187 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
188 {1, 0, 0, 0}}},
189 {{{0, 0, 0, 0},
190 {0, 0, 0, 0},
191 {0, 0, 0, 0}},
192 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
193 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
194 {1, 0, 0, 0}},
195 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
196 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
197 {1, 0, 0, 0}},
198 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
199 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
200 {1, 0, 0, 0}},
201 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
202 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
203 {1, 0, 0, 0}},
204 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
205 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
206 {1, 0, 0, 0}},
207 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
208 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
209 {1, 0, 0, 0}},
210 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
211 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
212 {1, 0, 0, 0}},
213 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
214 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
215 {1, 0, 0, 0}},
216 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
217 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
218 {1, 0, 0, 0}},
219 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
220 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
221 {1, 0, 0, 0}},
222 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
223 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
224 {1, 0, 0, 0}},
225 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
226 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
227 {1, 0, 0, 0}},
228 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
229 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
230 {1, 0, 0, 0}},
231 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
232 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
233 {1, 0, 0, 0}},
234 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
235 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
236 {1, 0, 0, 0}}}
237 };
238
239 /* Precomputation for the group generator. */
240 struct nistp224_pre_comp_st {
241 felem g_pre_comp[2][16][3];
242 CRYPTO_REF_COUNT references;
243 CRYPTO_RWLOCK *lock;
244 };
245
246 const EC_METHOD *EC_GFp_nistp224_method(void)
247 {
248 static const EC_METHOD ret = {
249 EC_FLAGS_DEFAULT_OCT,
250 NID_X9_62_prime_field,
251 ossl_ec_GFp_nistp224_group_init,
252 ossl_ec_GFp_simple_group_finish,
253 ossl_ec_GFp_simple_group_clear_finish,
254 ossl_ec_GFp_nist_group_copy,
255 ossl_ec_GFp_nistp224_group_set_curve,
256 ossl_ec_GFp_simple_group_get_curve,
257 ossl_ec_GFp_simple_group_get_degree,
258 ossl_ec_group_simple_order_bits,
259 ossl_ec_GFp_simple_group_check_discriminant,
260 ossl_ec_GFp_simple_point_init,
261 ossl_ec_GFp_simple_point_finish,
262 ossl_ec_GFp_simple_point_clear_finish,
263 ossl_ec_GFp_simple_point_copy,
264 ossl_ec_GFp_simple_point_set_to_infinity,
265 ossl_ec_GFp_simple_point_set_affine_coordinates,
266 ossl_ec_GFp_nistp224_point_get_affine_coordinates,
267 0 /* point_set_compressed_coordinates */ ,
268 0 /* point2oct */ ,
269 0 /* oct2point */ ,
270 ossl_ec_GFp_simple_add,
271 ossl_ec_GFp_simple_dbl,
272 ossl_ec_GFp_simple_invert,
273 ossl_ec_GFp_simple_is_at_infinity,
274 ossl_ec_GFp_simple_is_on_curve,
275 ossl_ec_GFp_simple_cmp,
276 ossl_ec_GFp_simple_make_affine,
277 ossl_ec_GFp_simple_points_make_affine,
278 ossl_ec_GFp_nistp224_points_mul,
279 ossl_ec_GFp_nistp224_precompute_mult,
280 ossl_ec_GFp_nistp224_have_precompute_mult,
281 ossl_ec_GFp_nist_field_mul,
282 ossl_ec_GFp_nist_field_sqr,
283 0 /* field_div */ ,
284 ossl_ec_GFp_simple_field_inv,
285 0 /* field_encode */ ,
286 0 /* field_decode */ ,
287 0, /* field_set_to_one */
288 ossl_ec_key_simple_priv2oct,
289 ossl_ec_key_simple_oct2priv,
290 0, /* set private */
291 ossl_ec_key_simple_generate_key,
292 ossl_ec_key_simple_check_key,
293 ossl_ec_key_simple_generate_public_key,
294 0, /* keycopy */
295 0, /* keyfinish */
296 ossl_ecdh_simple_compute_key,
297 ossl_ecdsa_simple_sign_setup,
298 ossl_ecdsa_simple_sign_sig,
299 ossl_ecdsa_simple_verify_sig,
300 0, /* field_inverse_mod_ord */
301 0, /* blind_coordinates */
302 0, /* ladder_pre */
303 0, /* ladder_step */
304 0 /* ladder_post */
305 };
306
307 return &ret;
308 }
309
310 /*
311 * Helper functions to convert field elements to/from internal representation
312 */
313 static void bin28_to_felem(felem out, const u8 in[28])
314 {
315 out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
316 out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
317 out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
318 out[3] = (*((const limb_aX *)(in + 20))) >> 8;
319 }
320
321 static void felem_to_bin28(u8 out[28], const felem in)
322 {
323 unsigned i;
324 for (i = 0; i < 7; ++i) {
325 out[i] = in[0] >> (8 * i);
326 out[i + 7] = in[1] >> (8 * i);
327 out[i + 14] = in[2] >> (8 * i);
328 out[i + 21] = in[3] >> (8 * i);
329 }
330 }
331
332 /* From OpenSSL BIGNUM to internal representation */
333 static int BN_to_felem(felem out, const BIGNUM *bn)
334 {
335 felem_bytearray b_out;
336 int num_bytes;
337
338 if (BN_is_negative(bn)) {
339 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
340 return 0;
341 }
342 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
343 if (num_bytes < 0) {
344 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
345 return 0;
346 }
347 bin28_to_felem(out, b_out);
348 return 1;
349 }
350
351 /* From internal representation to OpenSSL BIGNUM */
352 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
353 {
354 felem_bytearray b_out;
355 felem_to_bin28(b_out, in);
356 return BN_lebin2bn(b_out, sizeof(b_out), out);
357 }
358
359 /******************************************************************************/
360 /*-
361 * FIELD OPERATIONS
362 *
363 * Field operations, using the internal representation of field elements.
364 * NB! These operations are specific to our point multiplication and cannot be
365 * expected to be correct in general - e.g., multiplication with a large scalar
366 * will cause an overflow.
367 *
368 */
369
370 static void felem_one(felem out)
371 {
372 out[0] = 1;
373 out[1] = 0;
374 out[2] = 0;
375 out[3] = 0;
376 }
377
378 static void felem_assign(felem out, const felem in)
379 {
380 out[0] = in[0];
381 out[1] = in[1];
382 out[2] = in[2];
383 out[3] = in[3];
384 }
385
386 /* Sum two field elements: out += in */
387 static void felem_sum(felem out, const felem in)
388 {
389 out[0] += in[0];
390 out[1] += in[1];
391 out[2] += in[2];
392 out[3] += in[3];
393 }
394
395 /* Subtract field elements: out -= in */
396 /* Assumes in[i] < 2^57 */
397 static void felem_diff(felem out, const felem in)
398 {
399 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
400 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
401 static const limb two58m42m2 = (((limb) 1) << 58) -
402 (((limb) 1) << 42) - (((limb) 1) << 2);
403
404 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
405 out[0] += two58p2;
406 out[1] += two58m42m2;
407 out[2] += two58m2;
408 out[3] += two58m2;
409
410 out[0] -= in[0];
411 out[1] -= in[1];
412 out[2] -= in[2];
413 out[3] -= in[3];
414 }
415
416 /* Subtract in unreduced 128-bit mode: out -= in */
417 /* Assumes in[i] < 2^119 */
418 static void widefelem_diff(widefelem out, const widefelem in)
419 {
420 static const widelimb two120 = ((widelimb) 1) << 120;
421 static const widelimb two120m64 = (((widelimb) 1) << 120) -
422 (((widelimb) 1) << 64);
423 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
424 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
425
426 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
427 out[0] += two120;
428 out[1] += two120m64;
429 out[2] += two120m64;
430 out[3] += two120;
431 out[4] += two120m104m64;
432 out[5] += two120m64;
433 out[6] += two120m64;
434
435 out[0] -= in[0];
436 out[1] -= in[1];
437 out[2] -= in[2];
438 out[3] -= in[3];
439 out[4] -= in[4];
440 out[5] -= in[5];
441 out[6] -= in[6];
442 }
443
444 /* Subtract in mixed mode: out128 -= in64 */
445 /* in[i] < 2^63 */
446 static void felem_diff_128_64(widefelem out, const felem in)
447 {
448 static const widelimb two64p8 = (((widelimb) 1) << 64) +
449 (((widelimb) 1) << 8);
450 static const widelimb two64m8 = (((widelimb) 1) << 64) -
451 (((widelimb) 1) << 8);
452 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
453 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
454
455 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
456 out[0] += two64p8;
457 out[1] += two64m48m8;
458 out[2] += two64m8;
459 out[3] += two64m8;
460
461 out[0] -= in[0];
462 out[1] -= in[1];
463 out[2] -= in[2];
464 out[3] -= in[3];
465 }
466
467 /*
468 * Multiply a field element by a scalar: out = out * scalar The scalars we
469 * actually use are small, so results fit without overflow
470 */
471 static void felem_scalar(felem out, const limb scalar)
472 {
473 out[0] *= scalar;
474 out[1] *= scalar;
475 out[2] *= scalar;
476 out[3] *= scalar;
477 }
478
479 /*
480 * Multiply an unreduced field element by a scalar: out = out * scalar The
481 * scalars we actually use are small, so results fit without overflow
482 */
483 static void widefelem_scalar(widefelem out, const widelimb scalar)
484 {
485 out[0] *= scalar;
486 out[1] *= scalar;
487 out[2] *= scalar;
488 out[3] *= scalar;
489 out[4] *= scalar;
490 out[5] *= scalar;
491 out[6] *= scalar;
492 }
493
494 /* Square a field element: out = in^2 */
495 static void felem_square(widefelem out, const felem in)
496 {
497 limb tmp0, tmp1, tmp2;
498 tmp0 = 2 * in[0];
499 tmp1 = 2 * in[1];
500 tmp2 = 2 * in[2];
501 out[0] = ((widelimb) in[0]) * in[0];
502 out[1] = ((widelimb) in[0]) * tmp1;
503 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
504 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
505 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
506 out[5] = ((widelimb) in[3]) * tmp2;
507 out[6] = ((widelimb) in[3]) * in[3];
508 }
509
510 /* Multiply two field elements: out = in1 * in2 */
511 static void felem_mul(widefelem out, const felem in1, const felem in2)
512 {
513 out[0] = ((widelimb) in1[0]) * in2[0];
514 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
515 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
516 ((widelimb) in1[2]) * in2[0];
517 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
518 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
519 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
520 ((widelimb) in1[3]) * in2[1];
521 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
522 out[6] = ((widelimb) in1[3]) * in2[3];
523 }
524
525 /*-
526 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
527 * Requires in[i] < 2^126,
528 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
529 static void felem_reduce(felem out, const widefelem in)
530 {
531 static const widelimb two127p15 = (((widelimb) 1) << 127) +
532 (((widelimb) 1) << 15);
533 static const widelimb two127m71 = (((widelimb) 1) << 127) -
534 (((widelimb) 1) << 71);
535 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
536 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
537 widelimb output[5];
538
539 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
540 output[0] = in[0] + two127p15;
541 output[1] = in[1] + two127m71m55;
542 output[2] = in[2] + two127m71;
543 output[3] = in[3];
544 output[4] = in[4];
545
546 /* Eliminate in[4], in[5], in[6] */
547 output[4] += in[6] >> 16;
548 output[3] += (in[6] & 0xffff) << 40;
549 output[2] -= in[6];
550
551 output[3] += in[5] >> 16;
552 output[2] += (in[5] & 0xffff) << 40;
553 output[1] -= in[5];
554
555 output[2] += output[4] >> 16;
556 output[1] += (output[4] & 0xffff) << 40;
557 output[0] -= output[4];
558
559 /* Carry 2 -> 3 -> 4 */
560 output[3] += output[2] >> 56;
561 output[2] &= 0x00ffffffffffffff;
562
563 output[4] = output[3] >> 56;
564 output[3] &= 0x00ffffffffffffff;
565
566 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
567
568 /* Eliminate output[4] */
569 output[2] += output[4] >> 16;
570 /* output[2] < 2^56 + 2^56 = 2^57 */
571 output[1] += (output[4] & 0xffff) << 40;
572 output[0] -= output[4];
573
574 /* Carry 0 -> 1 -> 2 -> 3 */
575 output[1] += output[0] >> 56;
576 out[0] = output[0] & 0x00ffffffffffffff;
577
578 output[2] += output[1] >> 56;
579 /* output[2] < 2^57 + 2^72 */
580 out[1] = output[1] & 0x00ffffffffffffff;
581 output[3] += output[2] >> 56;
582 /* output[3] <= 2^56 + 2^16 */
583 out[2] = output[2] & 0x00ffffffffffffff;
584
585 /*-
586 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
587 * out[3] <= 2^56 + 2^16 (due to final carry),
588 * so out < 2*p
589 */
590 out[3] = output[3];
591 }
592
593 static void felem_square_reduce(felem out, const felem in)
594 {
595 widefelem tmp;
596 felem_square(tmp, in);
597 felem_reduce(out, tmp);
598 }
599
600 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
601 {
602 widefelem tmp;
603 felem_mul(tmp, in1, in2);
604 felem_reduce(out, tmp);
605 }
606
607 /*
608 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
609 * call felem_reduce first)
610 */
611 static void felem_contract(felem out, const felem in)
612 {
613 static const int64_t two56 = ((limb) 1) << 56;
614 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
615 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
616 int64_t tmp[4], a;
617 tmp[0] = in[0];
618 tmp[1] = in[1];
619 tmp[2] = in[2];
620 tmp[3] = in[3];
621 /* Case 1: a = 1 iff in >= 2^224 */
622 a = (in[3] >> 56);
623 tmp[0] -= a;
624 tmp[1] += a << 40;
625 tmp[3] &= 0x00ffffffffffffff;
626 /*
627 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
628 * and the lower part is non-zero
629 */
630 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
631 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
632 a &= 0x00ffffffffffffff;
633 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
634 a = (a - 1) >> 63;
635 /* subtract 2^224 - 2^96 + 1 if a is all-one */
636 tmp[3] &= a ^ 0xffffffffffffffff;
637 tmp[2] &= a ^ 0xffffffffffffffff;
638 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
639 tmp[0] -= 1 & a;
640
641 /*
642 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
643 * non-zero, so we only need one step
644 */
645 a = tmp[0] >> 63;
646 tmp[0] += two56 & a;
647 tmp[1] -= 1 & a;
648
649 /* carry 1 -> 2 -> 3 */
650 tmp[2] += tmp[1] >> 56;
651 tmp[1] &= 0x00ffffffffffffff;
652
653 tmp[3] += tmp[2] >> 56;
654 tmp[2] &= 0x00ffffffffffffff;
655
656 /* Now 0 <= out < p */
657 out[0] = tmp[0];
658 out[1] = tmp[1];
659 out[2] = tmp[2];
660 out[3] = tmp[3];
661 }
662
663 /*
664 * Get negative value: out = -in
665 * Requires in[i] < 2^63,
666 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
667 */
668 static void felem_neg(felem out, const felem in)
669 {
670 widefelem tmp;
671
672 memset(tmp, 0, sizeof(tmp));
673 felem_diff_128_64(tmp, in);
674 felem_reduce(out, tmp);
675 }
676
677 /*
678 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
679 * elements are reduced to in < 2^225, so we only need to check three cases:
680 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
681 */
682 static limb felem_is_zero(const felem in)
683 {
684 limb zero, two224m96p1, two225m97p2;
685
686 zero = in[0] | in[1] | in[2] | in[3];
687 zero = (((int64_t) (zero) - 1) >> 63) & 1;
688 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
689 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
690 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
691 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
692 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
693 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
694 return (zero | two224m96p1 | two225m97p2);
695 }
696
697 static int felem_is_zero_int(const void *in)
698 {
699 return (int)(felem_is_zero(in) & ((limb) 1));
700 }
701
702 /* Invert a field element */
703 /* Computation chain copied from djb's code */
704 static void felem_inv(felem out, const felem in)
705 {
706 felem ftmp, ftmp2, ftmp3, ftmp4;
707 widefelem tmp;
708 unsigned i;
709
710 felem_square(tmp, in);
711 felem_reduce(ftmp, tmp); /* 2 */
712 felem_mul(tmp, in, ftmp);
713 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
714 felem_square(tmp, ftmp);
715 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
716 felem_mul(tmp, in, ftmp);
717 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
718 felem_square(tmp, ftmp);
719 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
720 felem_square(tmp, ftmp2);
721 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
722 felem_square(tmp, ftmp2);
723 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
724 felem_mul(tmp, ftmp2, ftmp);
725 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
726 felem_square(tmp, ftmp);
727 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
728 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
729 felem_square(tmp, ftmp2);
730 felem_reduce(ftmp2, tmp);
731 }
732 felem_mul(tmp, ftmp2, ftmp);
733 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
734 felem_square(tmp, ftmp2);
735 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
736 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp3, tmp);
739 }
740 felem_mul(tmp, ftmp3, ftmp2);
741 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
742 felem_square(tmp, ftmp2);
743 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
744 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
745 felem_square(tmp, ftmp3);
746 felem_reduce(ftmp3, tmp);
747 }
748 felem_mul(tmp, ftmp3, ftmp2);
749 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
750 felem_square(tmp, ftmp3);
751 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
752 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
753 felem_square(tmp, ftmp4);
754 felem_reduce(ftmp4, tmp);
755 }
756 felem_mul(tmp, ftmp3, ftmp4);
757 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
758 felem_square(tmp, ftmp3);
759 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
760 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
761 felem_square(tmp, ftmp4);
762 felem_reduce(ftmp4, tmp);
763 }
764 felem_mul(tmp, ftmp2, ftmp4);
765 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
766 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
767 felem_square(tmp, ftmp2);
768 felem_reduce(ftmp2, tmp);
769 }
770 felem_mul(tmp, ftmp2, ftmp);
771 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
772 felem_square(tmp, ftmp);
773 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
774 felem_mul(tmp, ftmp, in);
775 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
776 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
777 felem_square(tmp, ftmp);
778 felem_reduce(ftmp, tmp);
779 }
780 felem_mul(tmp, ftmp, ftmp3);
781 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
782 }
783
784 /*
785 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
786 * out to itself.
787 */
788 static void copy_conditional(felem out, const felem in, limb icopy)
789 {
790 unsigned i;
791 /*
792 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
793 */
794 const limb copy = -icopy;
795 for (i = 0; i < 4; ++i) {
796 const limb tmp = copy & (in[i] ^ out[i]);
797 out[i] ^= tmp;
798 }
799 }
800
801 /******************************************************************************/
802 /*-
803 * ELLIPTIC CURVE POINT OPERATIONS
804 *
805 * Points are represented in Jacobian projective coordinates:
806 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
807 * or to the point at infinity if Z == 0.
808 *
809 */
810
811 /*-
812 * Double an elliptic curve point:
813 * (X', Y', Z') = 2 * (X, Y, Z), where
814 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
815 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
816 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
817 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
818 * while x_out == y_in is not (maybe this works, but it's not tested).
819 */
820 static void
821 point_double(felem x_out, felem y_out, felem z_out,
822 const felem x_in, const felem y_in, const felem z_in)
823 {
824 widefelem tmp, tmp2;
825 felem delta, gamma, beta, alpha, ftmp, ftmp2;
826
827 felem_assign(ftmp, x_in);
828 felem_assign(ftmp2, x_in);
829
830 /* delta = z^2 */
831 felem_square(tmp, z_in);
832 felem_reduce(delta, tmp);
833
834 /* gamma = y^2 */
835 felem_square(tmp, y_in);
836 felem_reduce(gamma, tmp);
837
838 /* beta = x*gamma */
839 felem_mul(tmp, x_in, gamma);
840 felem_reduce(beta, tmp);
841
842 /* alpha = 3*(x-delta)*(x+delta) */
843 felem_diff(ftmp, delta);
844 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
845 felem_sum(ftmp2, delta);
846 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
847 felem_scalar(ftmp2, 3);
848 /* ftmp2[i] < 3 * 2^58 < 2^60 */
849 felem_mul(tmp, ftmp, ftmp2);
850 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
851 felem_reduce(alpha, tmp);
852
853 /* x' = alpha^2 - 8*beta */
854 felem_square(tmp, alpha);
855 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
856 felem_assign(ftmp, beta);
857 felem_scalar(ftmp, 8);
858 /* ftmp[i] < 8 * 2^57 = 2^60 */
859 felem_diff_128_64(tmp, ftmp);
860 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
861 felem_reduce(x_out, tmp);
862
863 /* z' = (y + z)^2 - gamma - delta */
864 felem_sum(delta, gamma);
865 /* delta[i] < 2^57 + 2^57 = 2^58 */
866 felem_assign(ftmp, y_in);
867 felem_sum(ftmp, z_in);
868 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
869 felem_square(tmp, ftmp);
870 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
871 felem_diff_128_64(tmp, delta);
872 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
873 felem_reduce(z_out, tmp);
874
875 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
876 felem_scalar(beta, 4);
877 /* beta[i] < 4 * 2^57 = 2^59 */
878 felem_diff(beta, x_out);
879 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
880 felem_mul(tmp, alpha, beta);
881 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
882 felem_square(tmp2, gamma);
883 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
884 widefelem_scalar(tmp2, 8);
885 /* tmp2[i] < 8 * 2^116 = 2^119 */
886 widefelem_diff(tmp, tmp2);
887 /* tmp[i] < 2^119 + 2^120 < 2^121 */
888 felem_reduce(y_out, tmp);
889 }
890
891 /*-
892 * Add two elliptic curve points:
893 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
894 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
895 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
896 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
897 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
898 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
899 *
900 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
901 */
902
903 /*
904 * This function is not entirely constant-time: it includes a branch for
905 * checking whether the two input points are equal, (while not equal to the
906 * point at infinity). This case never happens during single point
907 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
908 */
909 static void point_add(felem x3, felem y3, felem z3,
910 const felem x1, const felem y1, const felem z1,
911 const int mixed, const felem x2, const felem y2,
912 const felem z2)
913 {
914 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
915 widefelem tmp, tmp2;
916 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
917 limb points_equal;
918
919 if (!mixed) {
920 /* ftmp2 = z2^2 */
921 felem_square(tmp, z2);
922 felem_reduce(ftmp2, tmp);
923
924 /* ftmp4 = z2^3 */
925 felem_mul(tmp, ftmp2, z2);
926 felem_reduce(ftmp4, tmp);
927
928 /* ftmp4 = z2^3*y1 */
929 felem_mul(tmp2, ftmp4, y1);
930 felem_reduce(ftmp4, tmp2);
931
932 /* ftmp2 = z2^2*x1 */
933 felem_mul(tmp2, ftmp2, x1);
934 felem_reduce(ftmp2, tmp2);
935 } else {
936 /*
937 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
938 */
939
940 /* ftmp4 = z2^3*y1 */
941 felem_assign(ftmp4, y1);
942
943 /* ftmp2 = z2^2*x1 */
944 felem_assign(ftmp2, x1);
945 }
946
947 /* ftmp = z1^2 */
948 felem_square(tmp, z1);
949 felem_reduce(ftmp, tmp);
950
951 /* ftmp3 = z1^3 */
952 felem_mul(tmp, ftmp, z1);
953 felem_reduce(ftmp3, tmp);
954
955 /* tmp = z1^3*y2 */
956 felem_mul(tmp, ftmp3, y2);
957 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
958
959 /* ftmp3 = z1^3*y2 - z2^3*y1 */
960 felem_diff_128_64(tmp, ftmp4);
961 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
962 felem_reduce(ftmp3, tmp);
963
964 /* tmp = z1^2*x2 */
965 felem_mul(tmp, ftmp, x2);
966 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
967
968 /* ftmp = z1^2*x2 - z2^2*x1 */
969 felem_diff_128_64(tmp, ftmp2);
970 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
971 felem_reduce(ftmp, tmp);
972
973 /*
974 * The formulae are incorrect if the points are equal, in affine coordinates
975 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
976 * happens.
977 *
978 * We use bitwise operations to avoid potential side-channels introduced by
979 * the short-circuiting behaviour of boolean operators.
980 */
981 x_equal = felem_is_zero(ftmp);
982 y_equal = felem_is_zero(ftmp3);
983 /*
984 * The special case of either point being the point at infinity (z1 and/or
985 * z2 are zero), is handled separately later on in this function, so we
986 * avoid jumping to point_double here in those special cases.
987 */
988 z1_is_zero = felem_is_zero(z1);
989 z2_is_zero = felem_is_zero(z2);
990
991 /*
992 * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
993 * specific implementation `felem_is_zero()` returns truth as `0x1`
994 * (rather than `0xff..ff`).
995 *
996 * This implies that `~true` in this implementation becomes
997 * `0xff..fe` (rather than `0x0`): for this reason, to be used in
998 * the if expression, we mask out only the last bit in the next
999 * line.
1000 */
1001 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1002
1003 if (points_equal) {
1004 /*
1005 * This is obviously not constant-time but, as mentioned before, this
1006 * case never happens during single point multiplication, so there is no
1007 * timing leak for ECDH or ECDSA signing.
1008 */
1009 point_double(x3, y3, z3, x1, y1, z1);
1010 return;
1011 }
1012
1013 /* ftmp5 = z1*z2 */
1014 if (!mixed) {
1015 felem_mul(tmp, z1, z2);
1016 felem_reduce(ftmp5, tmp);
1017 } else {
1018 /* special case z2 = 0 is handled later */
1019 felem_assign(ftmp5, z1);
1020 }
1021
1022 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1023 felem_mul(tmp, ftmp, ftmp5);
1024 felem_reduce(z_out, tmp);
1025
1026 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1027 felem_assign(ftmp5, ftmp);
1028 felem_square(tmp, ftmp);
1029 felem_reduce(ftmp, tmp);
1030
1031 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1032 felem_mul(tmp, ftmp, ftmp5);
1033 felem_reduce(ftmp5, tmp);
1034
1035 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1036 felem_mul(tmp, ftmp2, ftmp);
1037 felem_reduce(ftmp2, tmp);
1038
1039 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1040 felem_mul(tmp, ftmp4, ftmp5);
1041 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1042
1043 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1044 felem_square(tmp2, ftmp3);
1045 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1046
1047 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1048 felem_diff_128_64(tmp2, ftmp5);
1049 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1050
1051 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1052 felem_assign(ftmp5, ftmp2);
1053 felem_scalar(ftmp5, 2);
1054 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1055
1056 /*-
1057 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1058 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1059 */
1060 felem_diff_128_64(tmp2, ftmp5);
1061 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1062 felem_reduce(x_out, tmp2);
1063
1064 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1065 felem_diff(ftmp2, x_out);
1066 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1067
1068 /*
1069 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1070 */
1071 felem_mul(tmp2, ftmp3, ftmp2);
1072 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1073
1074 /*-
1075 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1076 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1077 */
1078 widefelem_diff(tmp2, tmp);
1079 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1080 felem_reduce(y_out, tmp2);
1081
1082 /*
1083 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1084 * the point at infinity, so we need to check for this separately
1085 */
1086
1087 /*
1088 * if point 1 is at infinity, copy point 2 to output, and vice versa
1089 */
1090 copy_conditional(x_out, x2, z1_is_zero);
1091 copy_conditional(x_out, x1, z2_is_zero);
1092 copy_conditional(y_out, y2, z1_is_zero);
1093 copy_conditional(y_out, y1, z2_is_zero);
1094 copy_conditional(z_out, z2, z1_is_zero);
1095 copy_conditional(z_out, z1, z2_is_zero);
1096 felem_assign(x3, x_out);
1097 felem_assign(y3, y_out);
1098 felem_assign(z3, z_out);
1099 }
1100
1101 /*
1102 * select_point selects the |idx|th point from a precomputation table and
1103 * copies it to out.
1104 * The pre_comp array argument should be size of |size| argument
1105 */
1106 static void select_point(const u64 idx, unsigned int size,
1107 const felem pre_comp[][3], felem out[3])
1108 {
1109 unsigned i, j;
1110 limb *outlimbs = &out[0][0];
1111
1112 memset(out, 0, sizeof(*out) * 3);
1113 for (i = 0; i < size; i++) {
1114 const limb *inlimbs = &pre_comp[i][0][0];
1115 u64 mask = i ^ idx;
1116 mask |= mask >> 4;
1117 mask |= mask >> 2;
1118 mask |= mask >> 1;
1119 mask &= 1;
1120 mask--;
1121 for (j = 0; j < 4 * 3; j++)
1122 outlimbs[j] |= inlimbs[j] & mask;
1123 }
1124 }
1125
1126 /* get_bit returns the |i|th bit in |in| */
1127 static char get_bit(const felem_bytearray in, unsigned i)
1128 {
1129 if (i >= 224)
1130 return 0;
1131 return (in[i >> 3] >> (i & 7)) & 1;
1132 }
1133
1134 /*
1135 * Interleaved point multiplication using precomputed point multiples: The
1136 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1137 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1138 * generator, using certain (large) precomputed multiples in g_pre_comp.
1139 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1140 */
1141 static void batch_mul(felem x_out, felem y_out, felem z_out,
1142 const felem_bytearray scalars[],
1143 const unsigned num_points, const u8 *g_scalar,
1144 const int mixed, const felem pre_comp[][17][3],
1145 const felem g_pre_comp[2][16][3])
1146 {
1147 int i, skip;
1148 unsigned num;
1149 unsigned gen_mul = (g_scalar != NULL);
1150 felem nq[3], tmp[4];
1151 u64 bits;
1152 u8 sign, digit;
1153
1154 /* set nq to the point at infinity */
1155 memset(nq, 0, sizeof(nq));
1156
1157 /*
1158 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1159 * of the generator (two in each of the last 28 rounds) and additions of
1160 * other points multiples (every 5th round).
1161 */
1162 skip = 1; /* save two point operations in the first
1163 * round */
1164 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1165 /* double */
1166 if (!skip)
1167 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1168
1169 /* add multiples of the generator */
1170 if (gen_mul && (i <= 27)) {
1171 /* first, look 28 bits upwards */
1172 bits = get_bit(g_scalar, i + 196) << 3;
1173 bits |= get_bit(g_scalar, i + 140) << 2;
1174 bits |= get_bit(g_scalar, i + 84) << 1;
1175 bits |= get_bit(g_scalar, i + 28);
1176 /* select the point to add, in constant time */
1177 select_point(bits, 16, g_pre_comp[1], tmp);
1178
1179 if (!skip) {
1180 /* value 1 below is argument for "mixed" */
1181 point_add(nq[0], nq[1], nq[2],
1182 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1183 } else {
1184 memcpy(nq, tmp, 3 * sizeof(felem));
1185 skip = 0;
1186 }
1187
1188 /* second, look at the current position */
1189 bits = get_bit(g_scalar, i + 168) << 3;
1190 bits |= get_bit(g_scalar, i + 112) << 2;
1191 bits |= get_bit(g_scalar, i + 56) << 1;
1192 bits |= get_bit(g_scalar, i);
1193 /* select the point to add, in constant time */
1194 select_point(bits, 16, g_pre_comp[0], tmp);
1195 point_add(nq[0], nq[1], nq[2],
1196 nq[0], nq[1], nq[2],
1197 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1198 }
1199
1200 /* do other additions every 5 doublings */
1201 if (num_points && (i % 5 == 0)) {
1202 /* loop over all scalars */
1203 for (num = 0; num < num_points; ++num) {
1204 bits = get_bit(scalars[num], i + 4) << 5;
1205 bits |= get_bit(scalars[num], i + 3) << 4;
1206 bits |= get_bit(scalars[num], i + 2) << 3;
1207 bits |= get_bit(scalars[num], i + 1) << 2;
1208 bits |= get_bit(scalars[num], i) << 1;
1209 bits |= get_bit(scalars[num], i - 1);
1210 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1211
1212 /* select the point to add or subtract */
1213 select_point(digit, 17, pre_comp[num], tmp);
1214 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1215 * point */
1216 copy_conditional(tmp[1], tmp[3], sign);
1217
1218 if (!skip) {
1219 point_add(nq[0], nq[1], nq[2],
1220 nq[0], nq[1], nq[2],
1221 mixed, tmp[0], tmp[1], tmp[2]);
1222 } else {
1223 memcpy(nq, tmp, 3 * sizeof(felem));
1224 skip = 0;
1225 }
1226 }
1227 }
1228 }
1229 felem_assign(x_out, nq[0]);
1230 felem_assign(y_out, nq[1]);
1231 felem_assign(z_out, nq[2]);
1232 }
1233
1234 /******************************************************************************/
1235 /*
1236 * FUNCTIONS TO MANAGE PRECOMPUTATION
1237 */
1238
1239 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1240 {
1241 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1242
1243 if (!ret) {
1244 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1245 return ret;
1246 }
1247
1248 ret->references = 1;
1249
1250 ret->lock = CRYPTO_THREAD_lock_new();
1251 if (ret->lock == NULL) {
1252 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1253 OPENSSL_free(ret);
1254 return NULL;
1255 }
1256 return ret;
1257 }
1258
1259 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1260 {
1261 int i;
1262 if (p != NULL)
1263 CRYPTO_UP_REF(&p->references, &i, p->lock);
1264 return p;
1265 }
1266
1267 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1268 {
1269 int i;
1270
1271 if (p == NULL)
1272 return;
1273
1274 CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1275 REF_PRINT_COUNT("EC_nistp224", x);
1276 if (i > 0)
1277 return;
1278 REF_ASSERT_ISNT(i < 0);
1279
1280 CRYPTO_THREAD_lock_free(p->lock);
1281 OPENSSL_free(p);
1282 }
1283
1284 /******************************************************************************/
1285 /*
1286 * OPENSSL EC_METHOD FUNCTIONS
1287 */
1288
1289 int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1290 {
1291 int ret;
1292 ret = ossl_ec_GFp_simple_group_init(group);
1293 group->a_is_minus3 = 1;
1294 return ret;
1295 }
1296
1297 int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1298 const BIGNUM *a, const BIGNUM *b,
1299 BN_CTX *ctx)
1300 {
1301 int ret = 0;
1302 BIGNUM *curve_p, *curve_a, *curve_b;
1303 #ifndef FIPS_MODULE
1304 BN_CTX *new_ctx = NULL;
1305
1306 if (ctx == NULL)
1307 ctx = new_ctx = BN_CTX_new();
1308 #endif
1309 if (ctx == NULL)
1310 return 0;
1311
1312 BN_CTX_start(ctx);
1313 curve_p = BN_CTX_get(ctx);
1314 curve_a = BN_CTX_get(ctx);
1315 curve_b = BN_CTX_get(ctx);
1316 if (curve_b == NULL)
1317 goto err;
1318 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1319 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1320 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1321 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1322 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1323 goto err;
1324 }
1325 group->field_mod_func = BN_nist_mod_224;
1326 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327 err:
1328 BN_CTX_end(ctx);
1329 #ifndef FIPS_MODULE
1330 BN_CTX_free(new_ctx);
1331 #endif
1332 return ret;
1333 }
1334
1335 /*
1336 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1337 * (X/Z^2, Y/Z^3)
1338 */
1339 int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1340 const EC_POINT *point,
1341 BIGNUM *x, BIGNUM *y,
1342 BN_CTX *ctx)
1343 {
1344 felem z1, z2, x_in, y_in, x_out, y_out;
1345 widefelem tmp;
1346
1347 if (EC_POINT_is_at_infinity(group, point)) {
1348 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1349 return 0;
1350 }
1351 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1352 (!BN_to_felem(z1, point->Z)))
1353 return 0;
1354 felem_inv(z2, z1);
1355 felem_square(tmp, z2);
1356 felem_reduce(z1, tmp);
1357 felem_mul(tmp, x_in, z1);
1358 felem_reduce(x_in, tmp);
1359 felem_contract(x_out, x_in);
1360 if (x != NULL) {
1361 if (!felem_to_BN(x, x_out)) {
1362 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1363 return 0;
1364 }
1365 }
1366 felem_mul(tmp, z1, z2);
1367 felem_reduce(z1, tmp);
1368 felem_mul(tmp, y_in, z1);
1369 felem_reduce(y_in, tmp);
1370 felem_contract(y_out, y_in);
1371 if (y != NULL) {
1372 if (!felem_to_BN(y, y_out)) {
1373 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1374 return 0;
1375 }
1376 }
1377 return 1;
1378 }
1379
1380 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1381 felem tmp_felems[ /* num+1 */ ])
1382 {
1383 /*
1384 * Runs in constant time, unless an input is the point at infinity (which
1385 * normally shouldn't happen).
1386 */
1387 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1388 points,
1389 sizeof(felem),
1390 tmp_felems,
1391 (void (*)(void *))felem_one,
1392 felem_is_zero_int,
1393 (void (*)(void *, const void *))
1394 felem_assign,
1395 (void (*)(void *, const void *))
1396 felem_square_reduce, (void (*)
1397 (void *,
1398 const void
1399 *,
1400 const void
1401 *))
1402 felem_mul_reduce,
1403 (void (*)(void *, const void *))
1404 felem_inv,
1405 (void (*)(void *, const void *))
1406 felem_contract);
1407 }
1408
1409 /*
1410 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1411 * values Result is stored in r (r can equal one of the inputs).
1412 */
1413 int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1414 const BIGNUM *scalar, size_t num,
1415 const EC_POINT *points[],
1416 const BIGNUM *scalars[], BN_CTX *ctx)
1417 {
1418 int ret = 0;
1419 int j;
1420 unsigned i;
1421 int mixed = 0;
1422 BIGNUM *x, *y, *z, *tmp_scalar;
1423 felem_bytearray g_secret;
1424 felem_bytearray *secrets = NULL;
1425 felem (*pre_comp)[17][3] = NULL;
1426 felem *tmp_felems = NULL;
1427 int num_bytes;
1428 int have_pre_comp = 0;
1429 size_t num_points = num;
1430 felem x_in, y_in, z_in, x_out, y_out, z_out;
1431 NISTP224_PRE_COMP *pre = NULL;
1432 const felem(*g_pre_comp)[16][3] = NULL;
1433 EC_POINT *generator = NULL;
1434 const EC_POINT *p = NULL;
1435 const BIGNUM *p_scalar = NULL;
1436
1437 BN_CTX_start(ctx);
1438 x = BN_CTX_get(ctx);
1439 y = BN_CTX_get(ctx);
1440 z = BN_CTX_get(ctx);
1441 tmp_scalar = BN_CTX_get(ctx);
1442 if (tmp_scalar == NULL)
1443 goto err;
1444
1445 if (scalar != NULL) {
1446 pre = group->pre_comp.nistp224;
1447 if (pre)
1448 /* we have precomputation, try to use it */
1449 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1450 else
1451 /* try to use the standard precomputation */
1452 g_pre_comp = &gmul[0];
1453 generator = EC_POINT_new(group);
1454 if (generator == NULL)
1455 goto err;
1456 /* get the generator from precomputation */
1457 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1458 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1459 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1460 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1461 goto err;
1462 }
1463 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1464 generator,
1465 x, y, z, ctx))
1466 goto err;
1467 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1468 /* precomputation matches generator */
1469 have_pre_comp = 1;
1470 else
1471 /*
1472 * we don't have valid precomputation: treat the generator as a
1473 * random point
1474 */
1475 num_points = num_points + 1;
1476 }
1477
1478 if (num_points > 0) {
1479 if (num_points >= 3) {
1480 /*
1481 * unless we precompute multiples for just one or two points,
1482 * converting those into affine form is time well spent
1483 */
1484 mixed = 1;
1485 }
1486 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1487 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1488 if (mixed)
1489 tmp_felems =
1490 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1491 if ((secrets == NULL) || (pre_comp == NULL)
1492 || (mixed && (tmp_felems == NULL))) {
1493 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1494 goto err;
1495 }
1496
1497 /*
1498 * we treat NULL scalars as 0, and NULL points as points at infinity,
1499 * i.e., they contribute nothing to the linear combination
1500 */
1501 for (i = 0; i < num_points; ++i) {
1502 if (i == num) {
1503 /* the generator */
1504 p = EC_GROUP_get0_generator(group);
1505 p_scalar = scalar;
1506 } else {
1507 /* the i^th point */
1508 p = points[i];
1509 p_scalar = scalars[i];
1510 }
1511 if ((p_scalar != NULL) && (p != NULL)) {
1512 /* reduce scalar to 0 <= scalar < 2^224 */
1513 if ((BN_num_bits(p_scalar) > 224)
1514 || (BN_is_negative(p_scalar))) {
1515 /*
1516 * this is an unusual input, and we don't guarantee
1517 * constant-timeness
1518 */
1519 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1520 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1521 goto err;
1522 }
1523 num_bytes = BN_bn2lebinpad(tmp_scalar,
1524 secrets[i], sizeof(secrets[i]));
1525 } else {
1526 num_bytes = BN_bn2lebinpad(p_scalar,
1527 secrets[i], sizeof(secrets[i]));
1528 }
1529 if (num_bytes < 0) {
1530 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1531 goto err;
1532 }
1533 /* precompute multiples */
1534 if ((!BN_to_felem(x_out, p->X)) ||
1535 (!BN_to_felem(y_out, p->Y)) ||
1536 (!BN_to_felem(z_out, p->Z)))
1537 goto err;
1538 felem_assign(pre_comp[i][1][0], x_out);
1539 felem_assign(pre_comp[i][1][1], y_out);
1540 felem_assign(pre_comp[i][1][2], z_out);
1541 for (j = 2; j <= 16; ++j) {
1542 if (j & 1) {
1543 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1544 pre_comp[i][j][2], pre_comp[i][1][0],
1545 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1546 pre_comp[i][j - 1][0],
1547 pre_comp[i][j - 1][1],
1548 pre_comp[i][j - 1][2]);
1549 } else {
1550 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1551 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1552 pre_comp[i][j / 2][1],
1553 pre_comp[i][j / 2][2]);
1554 }
1555 }
1556 }
1557 }
1558 if (mixed)
1559 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1560 }
1561
1562 /* the scalar for the generator */
1563 if ((scalar != NULL) && (have_pre_comp)) {
1564 memset(g_secret, 0, sizeof(g_secret));
1565 /* reduce scalar to 0 <= scalar < 2^224 */
1566 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1567 /*
1568 * this is an unusual input, and we don't guarantee
1569 * constant-timeness
1570 */
1571 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1572 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1573 goto err;
1574 }
1575 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1576 } else {
1577 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1578 }
1579 /* do the multiplication with generator precomputation */
1580 batch_mul(x_out, y_out, z_out,
1581 (const felem_bytearray(*))secrets, num_points,
1582 g_secret,
1583 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1584 } else {
1585 /* do the multiplication without generator precomputation */
1586 batch_mul(x_out, y_out, z_out,
1587 (const felem_bytearray(*))secrets, num_points,
1588 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1589 }
1590 /* reduce the output to its unique minimal representation */
1591 felem_contract(x_in, x_out);
1592 felem_contract(y_in, y_out);
1593 felem_contract(z_in, z_out);
1594 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1595 (!felem_to_BN(z, z_in))) {
1596 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1597 goto err;
1598 }
1599 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1600 ctx);
1601
1602 err:
1603 BN_CTX_end(ctx);
1604 EC_POINT_free(generator);
1605 OPENSSL_free(secrets);
1606 OPENSSL_free(pre_comp);
1607 OPENSSL_free(tmp_felems);
1608 return ret;
1609 }
1610
1611 int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1612 {
1613 int ret = 0;
1614 NISTP224_PRE_COMP *pre = NULL;
1615 int i, j;
1616 BIGNUM *x, *y;
1617 EC_POINT *generator = NULL;
1618 felem tmp_felems[32];
1619 #ifndef FIPS_MODULE
1620 BN_CTX *new_ctx = NULL;
1621 #endif
1622
1623 /* throw away old precomputation */
1624 EC_pre_comp_free(group);
1625
1626 #ifndef FIPS_MODULE
1627 if (ctx == NULL)
1628 ctx = new_ctx = BN_CTX_new();
1629 #endif
1630 if (ctx == NULL)
1631 return 0;
1632
1633 BN_CTX_start(ctx);
1634 x = BN_CTX_get(ctx);
1635 y = BN_CTX_get(ctx);
1636 if (y == NULL)
1637 goto err;
1638 /* get the generator */
1639 if (group->generator == NULL)
1640 goto err;
1641 generator = EC_POINT_new(group);
1642 if (generator == NULL)
1643 goto err;
1644 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1645 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1646 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1647 goto err;
1648 if ((pre = nistp224_pre_comp_new()) == NULL)
1649 goto err;
1650 /*
1651 * if the generator is the standard one, use built-in precomputation
1652 */
1653 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1654 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1655 goto done;
1656 }
1657 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1658 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1659 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1660 goto err;
1661 /*
1662 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1663 * 2^140*G, 2^196*G for the second one
1664 */
1665 for (i = 1; i <= 8; i <<= 1) {
1666 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1667 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1668 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1669 for (j = 0; j < 27; ++j) {
1670 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1671 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1672 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1673 }
1674 if (i == 8)
1675 break;
1676 point_double(pre->g_pre_comp[0][2 * i][0],
1677 pre->g_pre_comp[0][2 * i][1],
1678 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1679 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1680 for (j = 0; j < 27; ++j) {
1681 point_double(pre->g_pre_comp[0][2 * i][0],
1682 pre->g_pre_comp[0][2 * i][1],
1683 pre->g_pre_comp[0][2 * i][2],
1684 pre->g_pre_comp[0][2 * i][0],
1685 pre->g_pre_comp[0][2 * i][1],
1686 pre->g_pre_comp[0][2 * i][2]);
1687 }
1688 }
1689 for (i = 0; i < 2; i++) {
1690 /* g_pre_comp[i][0] is the point at infinity */
1691 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1692 /* the remaining multiples */
1693 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1694 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1695 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1696 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1697 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1698 pre->g_pre_comp[i][2][2]);
1699 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1700 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1701 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1702 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1703 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704 pre->g_pre_comp[i][2][2]);
1705 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1706 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1707 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1708 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1710 pre->g_pre_comp[i][4][2]);
1711 /*
1712 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1713 */
1714 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1715 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1716 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1717 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1718 pre->g_pre_comp[i][2][2]);
1719 for (j = 1; j < 8; ++j) {
1720 /* odd multiples: add G resp. 2^28*G */
1721 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1722 pre->g_pre_comp[i][2 * j + 1][1],
1723 pre->g_pre_comp[i][2 * j + 1][2],
1724 pre->g_pre_comp[i][2 * j][0],
1725 pre->g_pre_comp[i][2 * j][1],
1726 pre->g_pre_comp[i][2 * j][2], 0,
1727 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1728 pre->g_pre_comp[i][1][2]);
1729 }
1730 }
1731 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1732
1733 done:
1734 SETPRECOMP(group, nistp224, pre);
1735 pre = NULL;
1736 ret = 1;
1737 err:
1738 BN_CTX_end(ctx);
1739 EC_POINT_free(generator);
1740 #ifndef FIPS_MODULE
1741 BN_CTX_free(new_ctx);
1742 #endif
1743 EC_nistp224_pre_comp_free(pre);
1744 return ret;
1745 }
1746
1747 int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1748 {
1749 return HAVEPRECOMP(group, nistp224);
1750 }