]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp224.c
121f587b58b644d4e770dcb3927268d014fde5df
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
1 /* crypto/ec/ecp_nistp224.c */
2 /*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31 # ifndef OPENSSL_SYS_VMS
32 # include <stdint.h>
33 # else
34 # include <inttypes.h>
35 # endif
36
37 # include <string.h>
38 # include <openssl/err.h>
39 # include "ec_lcl.h"
40
41 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
44 * platforms */
45 # else
46 # error "Need GCC 3.1 or later to define type uint128_t"
47 # endif
48
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51
52 /******************************************************************************/
53 /*-
54 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
55 *
56 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
57 * using 64-bit coefficients called 'limbs',
58 * and sometimes (for multiplication results) as
59 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
60 * using 128-bit coefficients called 'widelimbs'.
61 * A 4-limb representation is an 'felem';
62 * a 7-widelimb representation is a 'widefelem'.
63 * Even within felems, bits of adjacent limbs overlap, and we don't always
64 * reduce the representations: we ensure that inputs to each felem
65 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
66 * and fit into a 128-bit word without overflow. The coefficients are then
67 * again partially reduced to obtain an felem satisfying a_i < 2^57.
68 * We only reduce to the unique minimal representation at the end of the
69 * computation.
70 */
71
72 typedef uint64_t limb;
73 typedef uint128_t widelimb;
74
75 typedef limb felem[4];
76 typedef widelimb widefelem[7];
77
78 /*
79 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
80 * group order size for the elliptic curve, and we also use this type for
81 * scalars for point multiplication.
82 */
83 typedef u8 felem_bytearray[28];
84
85 static const felem_bytearray nistp224_curve_params[5] = {
86 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
88 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
89 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
90 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
92 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
93 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
94 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
95 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
96 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
97 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
98 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
99 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
100 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
101 };
102
103 /*-
104 * Precomputed multiples of the standard generator
105 * Points are given in coordinates (X, Y, Z) where Z normally is 1
106 * (0 for the point at infinity).
107 * For each field element, slice a_0 is word 0, etc.
108 *
109 * The table has 2 * 16 elements, starting with the following:
110 * index | bits | point
111 * ------+---------+------------------------------
112 * 0 | 0 0 0 0 | 0G
113 * 1 | 0 0 0 1 | 1G
114 * 2 | 0 0 1 0 | 2^56G
115 * 3 | 0 0 1 1 | (2^56 + 1)G
116 * 4 | 0 1 0 0 | 2^112G
117 * 5 | 0 1 0 1 | (2^112 + 1)G
118 * 6 | 0 1 1 0 | (2^112 + 2^56)G
119 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120 * 8 | 1 0 0 0 | 2^168G
121 * 9 | 1 0 0 1 | (2^168 + 1)G
122 * 10 | 1 0 1 0 | (2^168 + 2^56)G
123 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124 * 12 | 1 1 0 0 | (2^168 + 2^112)G
125 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128 * followed by a copy of this with each element multiplied by 2^28.
129 *
130 * The reason for this is so that we can clock bits into four different
131 * locations when doing simple scalar multiplies against the base point,
132 * and then another four locations using the second 16 elements.
133 */
134 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
135 {0, 0, 0, 0},
136 {0, 0, 0, 0}},
137 {{0x3280d6115c1d21, 0xc1d356c2112234,
138 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139 {0xd5819985007e34, 0x75a05a07476444,
140 0xfb4c22dfe6cd43, 0xbd376388b5f723},
141 {1, 0, 0, 0}},
142 {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
143 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144 {0x29e0b892dc9c43, 0xece8608436e662,
145 0xdc858f185310d0, 0x9812dd4eb8d321},
146 {1, 0, 0, 0}},
147 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
148 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149 {0xf19f90ed50266d, 0xabf2b4bf65f9df,
150 0x313865468fafec, 0x5cb379ba910a17},
151 {1, 0, 0, 0}},
152 {{0x0641966cab26e3, 0x91fb2991fab0a0,
153 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154 {0x7510407766af5d, 0x84d929610d5450,
155 0x81d77aae82f706, 0x6916f6d4338c5b},
156 {1, 0, 0, 0}},
157 {{0xea95ac3b1f15c6, 0x086000905e82d4,
158 0xdd323ae4d1c8b1, 0x932b56be7685a3},
159 {0x9ef93dea25dbbf, 0x41665960f390f0,
160 0xfdec76dbe2a8a7, 0x523e80f019062a},
161 {1, 0, 0, 0}},
162 {{0x822fdd26732c73, 0xa01c83531b5d0f,
163 0x363f37347c1ba4, 0xc391b45c84725c},
164 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
165 0xc393da7e222a7f, 0x1efb7890ede244},
166 {1, 0, 0, 0}},
167 {{0x4c9e90ca217da1, 0xd11beca79159bb,
168 0xff8d33c2c98b7c, 0x2610b39409f849},
169 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
170 0x966c079b753c89, 0xfe67e4e820b112},
171 {1, 0, 0, 0}},
172 {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
173 0x79b7619a3e7c4c, 0x05c73240899b47},
174 {0x9f7f6382c73e3a, 0x18615165c56bda,
175 0x641fab2116fd56, 0x72855882b08394},
176 {1, 0, 0, 0}},
177 {{0x0469182f161c09, 0x74a98ca8d00fb5,
178 0xb89da93489a3e0, 0x41c98768fb0c1d},
179 {0xe5ea05fb32da81, 0x3dce9ffbca6855,
180 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
181 {1, 0, 0, 0}},
182 {{0xdab22b2333e87f, 0x4430137a5dd2f6,
183 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
184 {0x764a7df0c8fda5, 0x185ba5c3fa2044,
185 0x9281d688bcbe50, 0xc40331df893881},
186 {1, 0, 0, 0}},
187 {{0xb89530796f0f60, 0xade92bd26909a3,
188 0x1a0c83fb4884da, 0x1765bf22a5a984},
189 {0x772a9ee75db09e, 0x23bc6c67cec16f,
190 0x4c1edba8b14e2f, 0xe2a215d9611369},
191 {1, 0, 0, 0}},
192 {{0x571e509fb5efb3, 0xade88696410552,
193 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
194 {0xff9f51160f4652, 0xb47ce2495a6539,
195 0xa2946c53b582f4, 0x286d2db3ee9a60},
196 {1, 0, 0, 0}},
197 {{0x40bbd5081a44af, 0x0995183b13926c,
198 0xbcefba6f47f6d0, 0x215619e9cc0057},
199 {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
200 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
201 {1, 0, 0, 0}},
202 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
203 0x1c29819435d2c6, 0xc813132f4c07e9},
204 {0x2891425503b11f, 0x08781030579fea,
205 0xf5426ba5cc9674, 0x1e28ebf18562bc},
206 {1, 0, 0, 0}},
207 {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
208 0xff17036691a973, 0xf1aef351497c58},
209 {0xdd1f2d600564ff, 0xdead073b1402db,
210 0x74a684435bd693, 0xeea7471f962558},
211 {1, 0, 0, 0}}},
212 {{{0, 0, 0, 0},
213 {0, 0, 0, 0},
214 {0, 0, 0, 0}},
215 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
216 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
217 {1, 0, 0, 0}},
218 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
219 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
220 {1, 0, 0, 0}},
221 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
222 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
223 {1, 0, 0, 0}},
224 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
225 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
226 {1, 0, 0, 0}},
227 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
228 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
229 {1, 0, 0, 0}},
230 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
231 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
232 {1, 0, 0, 0}},
233 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
234 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
235 {1, 0, 0, 0}},
236 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
237 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
238 {1, 0, 0, 0}},
239 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
240 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
241 {1, 0, 0, 0}},
242 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
243 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
244 {1, 0, 0, 0}},
245 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
246 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
247 {1, 0, 0, 0}},
248 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
249 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
250 {1, 0, 0, 0}},
251 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
252 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
253 {1, 0, 0, 0}},
254 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
255 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
256 {1, 0, 0, 0}},
257 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
258 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
259 {1, 0, 0, 0}}}
260 };
261
262 /* Precomputation for the group generator. */
263 typedef struct {
264 felem g_pre_comp[2][16][3];
265 int references;
266 } NISTP224_PRE_COMP;
267
268 const EC_METHOD *EC_GFp_nistp224_method(void)
269 {
270 static const EC_METHOD ret = {
271 EC_FLAGS_DEFAULT_OCT,
272 NID_X9_62_prime_field,
273 ec_GFp_nistp224_group_init,
274 ec_GFp_simple_group_finish,
275 ec_GFp_simple_group_clear_finish,
276 ec_GFp_nist_group_copy,
277 ec_GFp_nistp224_group_set_curve,
278 ec_GFp_simple_group_get_curve,
279 ec_GFp_simple_group_get_degree,
280 ec_GFp_simple_group_check_discriminant,
281 ec_GFp_simple_point_init,
282 ec_GFp_simple_point_finish,
283 ec_GFp_simple_point_clear_finish,
284 ec_GFp_simple_point_copy,
285 ec_GFp_simple_point_set_to_infinity,
286 ec_GFp_simple_set_Jprojective_coordinates_GFp,
287 ec_GFp_simple_get_Jprojective_coordinates_GFp,
288 ec_GFp_simple_point_set_affine_coordinates,
289 ec_GFp_nistp224_point_get_affine_coordinates,
290 0 /* point_set_compressed_coordinates */ ,
291 0 /* point2oct */ ,
292 0 /* oct2point */ ,
293 ec_GFp_simple_add,
294 ec_GFp_simple_dbl,
295 ec_GFp_simple_invert,
296 ec_GFp_simple_is_at_infinity,
297 ec_GFp_simple_is_on_curve,
298 ec_GFp_simple_cmp,
299 ec_GFp_simple_make_affine,
300 ec_GFp_simple_points_make_affine,
301 ec_GFp_nistp224_points_mul,
302 ec_GFp_nistp224_precompute_mult,
303 ec_GFp_nistp224_have_precompute_mult,
304 ec_GFp_nist_field_mul,
305 ec_GFp_nist_field_sqr,
306 0 /* field_div */ ,
307 0 /* field_encode */ ,
308 0 /* field_decode */ ,
309 0 /* field_set_to_one */
310 };
311
312 return &ret;
313 }
314
315 /*
316 * Helper functions to convert field elements to/from internal representation
317 */
318 static void bin28_to_felem(felem out, const u8 in[28])
319 {
320 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
321 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
322 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
323 out[3] = (*((const uint64_t *)(in+20))) >> 8;
324 }
325
326 static void felem_to_bin28(u8 out[28], const felem in)
327 {
328 unsigned i;
329 for (i = 0; i < 7; ++i) {
330 out[i] = in[0] >> (8 * i);
331 out[i + 7] = in[1] >> (8 * i);
332 out[i + 14] = in[2] >> (8 * i);
333 out[i + 21] = in[3] >> (8 * i);
334 }
335 }
336
337 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
338 static void flip_endian(u8 *out, const u8 *in, unsigned len)
339 {
340 unsigned i;
341 for (i = 0; i < len; ++i)
342 out[i] = in[len - 1 - i];
343 }
344
345 /* From OpenSSL BIGNUM to internal representation */
346 static int BN_to_felem(felem out, const BIGNUM *bn)
347 {
348 felem_bytearray b_in;
349 felem_bytearray b_out;
350 unsigned num_bytes;
351
352 /* BN_bn2bin eats leading zeroes */
353 memset(b_out, 0, sizeof(b_out));
354 num_bytes = BN_num_bytes(bn);
355 if (num_bytes > sizeof(b_out)) {
356 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
357 return 0;
358 }
359 if (BN_is_negative(bn)) {
360 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
361 return 0;
362 }
363 num_bytes = BN_bn2bin(bn, b_in);
364 flip_endian(b_out, b_in, num_bytes);
365 bin28_to_felem(out, b_out);
366 return 1;
367 }
368
369 /* From internal representation to OpenSSL BIGNUM */
370 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
371 {
372 felem_bytearray b_in, b_out;
373 felem_to_bin28(b_in, in);
374 flip_endian(b_out, b_in, sizeof(b_out));
375 return BN_bin2bn(b_out, sizeof(b_out), out);
376 }
377
378 /******************************************************************************/
379 /*-
380 * FIELD OPERATIONS
381 *
382 * Field operations, using the internal representation of field elements.
383 * NB! These operations are specific to our point multiplication and cannot be
384 * expected to be correct in general - e.g., multiplication with a large scalar
385 * will cause an overflow.
386 *
387 */
388
389 static void felem_one(felem out)
390 {
391 out[0] = 1;
392 out[1] = 0;
393 out[2] = 0;
394 out[3] = 0;
395 }
396
397 static void felem_assign(felem out, const felem in)
398 {
399 out[0] = in[0];
400 out[1] = in[1];
401 out[2] = in[2];
402 out[3] = in[3];
403 }
404
405 /* Sum two field elements: out += in */
406 static void felem_sum(felem out, const felem in)
407 {
408 out[0] += in[0];
409 out[1] += in[1];
410 out[2] += in[2];
411 out[3] += in[3];
412 }
413
414 /* Get negative value: out = -in */
415 /* Assumes in[i] < 2^57 */
416 static void felem_neg(felem out, const felem in)
417 {
418 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
419 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
420 static const limb two58m42m2 = (((limb) 1) << 58) -
421 (((limb) 1) << 42) - (((limb) 1) << 2);
422
423 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
424 out[0] = two58p2 - in[0];
425 out[1] = two58m42m2 - in[1];
426 out[2] = two58m2 - in[2];
427 out[3] = two58m2 - in[3];
428 }
429
430 /* Subtract field elements: out -= in */
431 /* Assumes in[i] < 2^57 */
432 static void felem_diff(felem out, const felem in)
433 {
434 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
435 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
436 static const limb two58m42m2 = (((limb) 1) << 58) -
437 (((limb) 1) << 42) - (((limb) 1) << 2);
438
439 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
440 out[0] += two58p2;
441 out[1] += two58m42m2;
442 out[2] += two58m2;
443 out[3] += two58m2;
444
445 out[0] -= in[0];
446 out[1] -= in[1];
447 out[2] -= in[2];
448 out[3] -= in[3];
449 }
450
451 /* Subtract in unreduced 128-bit mode: out -= in */
452 /* Assumes in[i] < 2^119 */
453 static void widefelem_diff(widefelem out, const widefelem in)
454 {
455 static const widelimb two120 = ((widelimb) 1) << 120;
456 static const widelimb two120m64 = (((widelimb) 1) << 120) -
457 (((widelimb) 1) << 64);
458 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
459 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
460
461 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
462 out[0] += two120;
463 out[1] += two120m64;
464 out[2] += two120m64;
465 out[3] += two120;
466 out[4] += two120m104m64;
467 out[5] += two120m64;
468 out[6] += two120m64;
469
470 out[0] -= in[0];
471 out[1] -= in[1];
472 out[2] -= in[2];
473 out[3] -= in[3];
474 out[4] -= in[4];
475 out[5] -= in[5];
476 out[6] -= in[6];
477 }
478
479 /* Subtract in mixed mode: out128 -= in64 */
480 /* in[i] < 2^63 */
481 static void felem_diff_128_64(widefelem out, const felem in)
482 {
483 static const widelimb two64p8 = (((widelimb) 1) << 64) +
484 (((widelimb) 1) << 8);
485 static const widelimb two64m8 = (((widelimb) 1) << 64) -
486 (((widelimb) 1) << 8);
487 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
488 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
489
490 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
491 out[0] += two64p8;
492 out[1] += two64m48m8;
493 out[2] += two64m8;
494 out[3] += two64m8;
495
496 out[0] -= in[0];
497 out[1] -= in[1];
498 out[2] -= in[2];
499 out[3] -= in[3];
500 }
501
502 /*
503 * Multiply a field element by a scalar: out = out * scalar The scalars we
504 * actually use are small, so results fit without overflow
505 */
506 static void felem_scalar(felem out, const limb scalar)
507 {
508 out[0] *= scalar;
509 out[1] *= scalar;
510 out[2] *= scalar;
511 out[3] *= scalar;
512 }
513
514 /*
515 * Multiply an unreduced field element by a scalar: out = out * scalar The
516 * scalars we actually use are small, so results fit without overflow
517 */
518 static void widefelem_scalar(widefelem out, const widelimb scalar)
519 {
520 out[0] *= scalar;
521 out[1] *= scalar;
522 out[2] *= scalar;
523 out[3] *= scalar;
524 out[4] *= scalar;
525 out[5] *= scalar;
526 out[6] *= scalar;
527 }
528
529 /* Square a field element: out = in^2 */
530 static void felem_square(widefelem out, const felem in)
531 {
532 limb tmp0, tmp1, tmp2;
533 tmp0 = 2 * in[0];
534 tmp1 = 2 * in[1];
535 tmp2 = 2 * in[2];
536 out[0] = ((widelimb) in[0]) * in[0];
537 out[1] = ((widelimb) in[0]) * tmp1;
538 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
539 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
540 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
541 out[5] = ((widelimb) in[3]) * tmp2;
542 out[6] = ((widelimb) in[3]) * in[3];
543 }
544
545 /* Multiply two field elements: out = in1 * in2 */
546 static void felem_mul(widefelem out, const felem in1, const felem in2)
547 {
548 out[0] = ((widelimb) in1[0]) * in2[0];
549 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
550 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
551 ((widelimb) in1[2]) * in2[0];
552 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
553 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
554 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
555 ((widelimb) in1[3]) * in2[1];
556 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
557 out[6] = ((widelimb) in1[3]) * in2[3];
558 }
559
560 /*-
561 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
562 * Requires in[i] < 2^126,
563 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
564 static void felem_reduce(felem out, const widefelem in)
565 {
566 static const widelimb two127p15 = (((widelimb) 1) << 127) +
567 (((widelimb) 1) << 15);
568 static const widelimb two127m71 = (((widelimb) 1) << 127) -
569 (((widelimb) 1) << 71);
570 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
571 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
572 widelimb output[5];
573
574 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
575 output[0] = in[0] + two127p15;
576 output[1] = in[1] + two127m71m55;
577 output[2] = in[2] + two127m71;
578 output[3] = in[3];
579 output[4] = in[4];
580
581 /* Eliminate in[4], in[5], in[6] */
582 output[4] += in[6] >> 16;
583 output[3] += (in[6] & 0xffff) << 40;
584 output[2] -= in[6];
585
586 output[3] += in[5] >> 16;
587 output[2] += (in[5] & 0xffff) << 40;
588 output[1] -= in[5];
589
590 output[2] += output[4] >> 16;
591 output[1] += (output[4] & 0xffff) << 40;
592 output[0] -= output[4];
593
594 /* Carry 2 -> 3 -> 4 */
595 output[3] += output[2] >> 56;
596 output[2] &= 0x00ffffffffffffff;
597
598 output[4] = output[3] >> 56;
599 output[3] &= 0x00ffffffffffffff;
600
601 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
602
603 /* Eliminate output[4] */
604 output[2] += output[4] >> 16;
605 /* output[2] < 2^56 + 2^56 = 2^57 */
606 output[1] += (output[4] & 0xffff) << 40;
607 output[0] -= output[4];
608
609 /* Carry 0 -> 1 -> 2 -> 3 */
610 output[1] += output[0] >> 56;
611 out[0] = output[0] & 0x00ffffffffffffff;
612
613 output[2] += output[1] >> 56;
614 /* output[2] < 2^57 + 2^72 */
615 out[1] = output[1] & 0x00ffffffffffffff;
616 output[3] += output[2] >> 56;
617 /* output[3] <= 2^56 + 2^16 */
618 out[2] = output[2] & 0x00ffffffffffffff;
619
620 /*-
621 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
622 * out[3] <= 2^56 + 2^16 (due to final carry),
623 * so out < 2*p
624 */
625 out[3] = output[3];
626 }
627
628 static void felem_square_reduce(felem out, const felem in)
629 {
630 widefelem tmp;
631 felem_square(tmp, in);
632 felem_reduce(out, tmp);
633 }
634
635 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
636 {
637 widefelem tmp;
638 felem_mul(tmp, in1, in2);
639 felem_reduce(out, tmp);
640 }
641
642 /*
643 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
644 * call felem_reduce first)
645 */
646 static void felem_contract(felem out, const felem in)
647 {
648 static const int64_t two56 = ((limb) 1) << 56;
649 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
650 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
651 int64_t tmp[4], a;
652 tmp[0] = in[0];
653 tmp[1] = in[1];
654 tmp[2] = in[2];
655 tmp[3] = in[3];
656 /* Case 1: a = 1 iff in >= 2^224 */
657 a = (in[3] >> 56);
658 tmp[0] -= a;
659 tmp[1] += a << 40;
660 tmp[3] &= 0x00ffffffffffffff;
661 /*
662 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
663 * and the lower part is non-zero
664 */
665 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
666 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
667 a &= 0x00ffffffffffffff;
668 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
669 a = (a - 1) >> 63;
670 /* subtract 2^224 - 2^96 + 1 if a is all-one */
671 tmp[3] &= a ^ 0xffffffffffffffff;
672 tmp[2] &= a ^ 0xffffffffffffffff;
673 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
674 tmp[0] -= 1 & a;
675
676 /*
677 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
678 * non-zero, so we only need one step
679 */
680 a = tmp[0] >> 63;
681 tmp[0] += two56 & a;
682 tmp[1] -= 1 & a;
683
684 /* carry 1 -> 2 -> 3 */
685 tmp[2] += tmp[1] >> 56;
686 tmp[1] &= 0x00ffffffffffffff;
687
688 tmp[3] += tmp[2] >> 56;
689 tmp[2] &= 0x00ffffffffffffff;
690
691 /* Now 0 <= out < p */
692 out[0] = tmp[0];
693 out[1] = tmp[1];
694 out[2] = tmp[2];
695 out[3] = tmp[3];
696 }
697
698 /*
699 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
700 * elements are reduced to in < 2^225, so we only need to check three cases:
701 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
702 */
703 static limb felem_is_zero(const felem in)
704 {
705 limb zero, two224m96p1, two225m97p2;
706
707 zero = in[0] | in[1] | in[2] | in[3];
708 zero = (((int64_t) (zero) - 1) >> 63) & 1;
709 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
710 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
711 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
712 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
713 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
714 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
715 return (zero | two224m96p1 | two225m97p2);
716 }
717
718 static int felem_is_zero_int(const void *in)
719 {
720 return (int)(felem_is_zero(in) & ((limb) 1));
721 }
722
723 /* Invert a field element */
724 /* Computation chain copied from djb's code */
725 static void felem_inv(felem out, const felem in)
726 {
727 felem ftmp, ftmp2, ftmp3, ftmp4;
728 widefelem tmp;
729 unsigned i;
730
731 felem_square(tmp, in);
732 felem_reduce(ftmp, tmp); /* 2 */
733 felem_mul(tmp, in, ftmp);
734 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
735 felem_square(tmp, ftmp);
736 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
737 felem_mul(tmp, in, ftmp);
738 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
739 felem_square(tmp, ftmp);
740 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
741 felem_square(tmp, ftmp2);
742 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
743 felem_square(tmp, ftmp2);
744 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
745 felem_mul(tmp, ftmp2, ftmp);
746 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
747 felem_square(tmp, ftmp);
748 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
749 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
750 felem_square(tmp, ftmp2);
751 felem_reduce(ftmp2, tmp);
752 }
753 felem_mul(tmp, ftmp2, ftmp);
754 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
755 felem_square(tmp, ftmp2);
756 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
757 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
758 felem_square(tmp, ftmp3);
759 felem_reduce(ftmp3, tmp);
760 }
761 felem_mul(tmp, ftmp3, ftmp2);
762 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
763 felem_square(tmp, ftmp2);
764 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
765 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
766 felem_square(tmp, ftmp3);
767 felem_reduce(ftmp3, tmp);
768 }
769 felem_mul(tmp, ftmp3, ftmp2);
770 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
771 felem_square(tmp, ftmp3);
772 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
773 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
774 felem_square(tmp, ftmp4);
775 felem_reduce(ftmp4, tmp);
776 }
777 felem_mul(tmp, ftmp3, ftmp4);
778 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
779 felem_square(tmp, ftmp3);
780 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
781 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
782 felem_square(tmp, ftmp4);
783 felem_reduce(ftmp4, tmp);
784 }
785 felem_mul(tmp, ftmp2, ftmp4);
786 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
787 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
788 felem_square(tmp, ftmp2);
789 felem_reduce(ftmp2, tmp);
790 }
791 felem_mul(tmp, ftmp2, ftmp);
792 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
793 felem_square(tmp, ftmp);
794 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
795 felem_mul(tmp, ftmp, in);
796 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
797 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
798 felem_square(tmp, ftmp);
799 felem_reduce(ftmp, tmp);
800 }
801 felem_mul(tmp, ftmp, ftmp3);
802 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
803 }
804
805 /*
806 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
807 * out to itself.
808 */
809 static void copy_conditional(felem out, const felem in, limb icopy)
810 {
811 unsigned i;
812 /*
813 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
814 */
815 const limb copy = -icopy;
816 for (i = 0; i < 4; ++i) {
817 const limb tmp = copy & (in[i] ^ out[i]);
818 out[i] ^= tmp;
819 }
820 }
821
822 /******************************************************************************/
823 /*-
824 * ELLIPTIC CURVE POINT OPERATIONS
825 *
826 * Points are represented in Jacobian projective coordinates:
827 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
828 * or to the point at infinity if Z == 0.
829 *
830 */
831
832 /*-
833 * Double an elliptic curve point:
834 * (X', Y', Z') = 2 * (X, Y, Z), where
835 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
836 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
837 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
838 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
839 * while x_out == y_in is not (maybe this works, but it's not tested).
840 */
841 static void
842 point_double(felem x_out, felem y_out, felem z_out,
843 const felem x_in, const felem y_in, const felem z_in)
844 {
845 widefelem tmp, tmp2;
846 felem delta, gamma, beta, alpha, ftmp, ftmp2;
847
848 felem_assign(ftmp, x_in);
849 felem_assign(ftmp2, x_in);
850
851 /* delta = z^2 */
852 felem_square(tmp, z_in);
853 felem_reduce(delta, tmp);
854
855 /* gamma = y^2 */
856 felem_square(tmp, y_in);
857 felem_reduce(gamma, tmp);
858
859 /* beta = x*gamma */
860 felem_mul(tmp, x_in, gamma);
861 felem_reduce(beta, tmp);
862
863 /* alpha = 3*(x-delta)*(x+delta) */
864 felem_diff(ftmp, delta);
865 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
866 felem_sum(ftmp2, delta);
867 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
868 felem_scalar(ftmp2, 3);
869 /* ftmp2[i] < 3 * 2^58 < 2^60 */
870 felem_mul(tmp, ftmp, ftmp2);
871 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
872 felem_reduce(alpha, tmp);
873
874 /* x' = alpha^2 - 8*beta */
875 felem_square(tmp, alpha);
876 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
877 felem_assign(ftmp, beta);
878 felem_scalar(ftmp, 8);
879 /* ftmp[i] < 8 * 2^57 = 2^60 */
880 felem_diff_128_64(tmp, ftmp);
881 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
882 felem_reduce(x_out, tmp);
883
884 /* z' = (y + z)^2 - gamma - delta */
885 felem_sum(delta, gamma);
886 /* delta[i] < 2^57 + 2^57 = 2^58 */
887 felem_assign(ftmp, y_in);
888 felem_sum(ftmp, z_in);
889 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
890 felem_square(tmp, ftmp);
891 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
892 felem_diff_128_64(tmp, delta);
893 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
894 felem_reduce(z_out, tmp);
895
896 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
897 felem_scalar(beta, 4);
898 /* beta[i] < 4 * 2^57 = 2^59 */
899 felem_diff(beta, x_out);
900 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
901 felem_mul(tmp, alpha, beta);
902 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
903 felem_square(tmp2, gamma);
904 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
905 widefelem_scalar(tmp2, 8);
906 /* tmp2[i] < 8 * 2^116 = 2^119 */
907 widefelem_diff(tmp, tmp2);
908 /* tmp[i] < 2^119 + 2^120 < 2^121 */
909 felem_reduce(y_out, tmp);
910 }
911
912 /*-
913 * Add two elliptic curve points:
914 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
915 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
916 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
917 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
918 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
919 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
920 *
921 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
922 */
923
924 /*
925 * This function is not entirely constant-time: it includes a branch for
926 * checking whether the two input points are equal, (while not equal to the
927 * point at infinity). This case never happens during single point
928 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
929 */
930 static void point_add(felem x3, felem y3, felem z3,
931 const felem x1, const felem y1, const felem z1,
932 const int mixed, const felem x2, const felem y2,
933 const felem z2)
934 {
935 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
936 widefelem tmp, tmp2;
937 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
938
939 if (!mixed) {
940 /* ftmp2 = z2^2 */
941 felem_square(tmp, z2);
942 felem_reduce(ftmp2, tmp);
943
944 /* ftmp4 = z2^3 */
945 felem_mul(tmp, ftmp2, z2);
946 felem_reduce(ftmp4, tmp);
947
948 /* ftmp4 = z2^3*y1 */
949 felem_mul(tmp2, ftmp4, y1);
950 felem_reduce(ftmp4, tmp2);
951
952 /* ftmp2 = z2^2*x1 */
953 felem_mul(tmp2, ftmp2, x1);
954 felem_reduce(ftmp2, tmp2);
955 } else {
956 /*
957 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
958 */
959
960 /* ftmp4 = z2^3*y1 */
961 felem_assign(ftmp4, y1);
962
963 /* ftmp2 = z2^2*x1 */
964 felem_assign(ftmp2, x1);
965 }
966
967 /* ftmp = z1^2 */
968 felem_square(tmp, z1);
969 felem_reduce(ftmp, tmp);
970
971 /* ftmp3 = z1^3 */
972 felem_mul(tmp, ftmp, z1);
973 felem_reduce(ftmp3, tmp);
974
975 /* tmp = z1^3*y2 */
976 felem_mul(tmp, ftmp3, y2);
977 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
978
979 /* ftmp3 = z1^3*y2 - z2^3*y1 */
980 felem_diff_128_64(tmp, ftmp4);
981 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
982 felem_reduce(ftmp3, tmp);
983
984 /* tmp = z1^2*x2 */
985 felem_mul(tmp, ftmp, x2);
986 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
987
988 /* ftmp = z1^2*x2 - z2^2*x1 */
989 felem_diff_128_64(tmp, ftmp2);
990 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
991 felem_reduce(ftmp, tmp);
992
993 /*
994 * the formulae are incorrect if the points are equal so we check for
995 * this and do doubling if this happens
996 */
997 x_equal = felem_is_zero(ftmp);
998 y_equal = felem_is_zero(ftmp3);
999 z1_is_zero = felem_is_zero(z1);
1000 z2_is_zero = felem_is_zero(z2);
1001 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1002 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1003 point_double(x3, y3, z3, x1, y1, z1);
1004 return;
1005 }
1006
1007 /* ftmp5 = z1*z2 */
1008 if (!mixed) {
1009 felem_mul(tmp, z1, z2);
1010 felem_reduce(ftmp5, tmp);
1011 } else {
1012 /* special case z2 = 0 is handled later */
1013 felem_assign(ftmp5, z1);
1014 }
1015
1016 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017 felem_mul(tmp, ftmp, ftmp5);
1018 felem_reduce(z_out, tmp);
1019
1020 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021 felem_assign(ftmp5, ftmp);
1022 felem_square(tmp, ftmp);
1023 felem_reduce(ftmp, tmp);
1024
1025 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026 felem_mul(tmp, ftmp, ftmp5);
1027 felem_reduce(ftmp5, tmp);
1028
1029 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030 felem_mul(tmp, ftmp2, ftmp);
1031 felem_reduce(ftmp2, tmp);
1032
1033 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034 felem_mul(tmp, ftmp4, ftmp5);
1035 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036
1037 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038 felem_square(tmp2, ftmp3);
1039 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040
1041 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042 felem_diff_128_64(tmp2, ftmp5);
1043 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044
1045 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046 felem_assign(ftmp5, ftmp2);
1047 felem_scalar(ftmp5, 2);
1048 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1049
1050 /*-
1051 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1053 */
1054 felem_diff_128_64(tmp2, ftmp5);
1055 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056 felem_reduce(x_out, tmp2);
1057
1058 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059 felem_diff(ftmp2, x_out);
1060 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061
1062 /*
1063 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1064 */
1065 felem_mul(tmp2, ftmp3, ftmp2);
1066 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067
1068 /*-
1069 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1071 */
1072 widefelem_diff(tmp2, tmp);
1073 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074 felem_reduce(y_out, tmp2);
1075
1076 /*
1077 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078 * the point at infinity, so we need to check for this separately
1079 */
1080
1081 /*
1082 * if point 1 is at infinity, copy point 2 to output, and vice versa
1083 */
1084 copy_conditional(x_out, x2, z1_is_zero);
1085 copy_conditional(x_out, x1, z2_is_zero);
1086 copy_conditional(y_out, y2, z1_is_zero);
1087 copy_conditional(y_out, y1, z2_is_zero);
1088 copy_conditional(z_out, z2, z1_is_zero);
1089 copy_conditional(z_out, z1, z2_is_zero);
1090 felem_assign(x3, x_out);
1091 felem_assign(y3, y_out);
1092 felem_assign(z3, z_out);
1093 }
1094
1095 /*
1096 * select_point selects the |idx|th point from a precomputation table and
1097 * copies it to out.
1098 * The pre_comp array argument should be size of |size| argument
1099 */
1100 static void select_point(const u64 idx, unsigned int size,
1101 const felem pre_comp[][3], felem out[3])
1102 {
1103 unsigned i, j;
1104 limb *outlimbs = &out[0][0];
1105 memset(outlimbs, 0, 3 * sizeof(felem));
1106
1107 for (i = 0; i < size; i++) {
1108 const limb *inlimbs = &pre_comp[i][0][0];
1109 u64 mask = i ^ idx;
1110 mask |= mask >> 4;
1111 mask |= mask >> 2;
1112 mask |= mask >> 1;
1113 mask &= 1;
1114 mask--;
1115 for (j = 0; j < 4 * 3; j++)
1116 outlimbs[j] |= inlimbs[j] & mask;
1117 }
1118 }
1119
1120 /* get_bit returns the |i|th bit in |in| */
1121 static char get_bit(const felem_bytearray in, unsigned i)
1122 {
1123 if (i >= 224)
1124 return 0;
1125 return (in[i >> 3] >> (i & 7)) & 1;
1126 }
1127
1128 /*
1129 * Interleaved point multiplication using precomputed point multiples: The
1130 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132 * generator, using certain (large) precomputed multiples in g_pre_comp.
1133 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134 */
1135 static void batch_mul(felem x_out, felem y_out, felem z_out,
1136 const felem_bytearray scalars[],
1137 const unsigned num_points, const u8 *g_scalar,
1138 const int mixed, const felem pre_comp[][17][3],
1139 const felem g_pre_comp[2][16][3])
1140 {
1141 int i, skip;
1142 unsigned num;
1143 unsigned gen_mul = (g_scalar != NULL);
1144 felem nq[3], tmp[4];
1145 u64 bits;
1146 u8 sign, digit;
1147
1148 /* set nq to the point at infinity */
1149 memset(nq, 0, 3 * sizeof(felem));
1150
1151 /*
1152 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153 * of the generator (two in each of the last 28 rounds) and additions of
1154 * other points multiples (every 5th round).
1155 */
1156 skip = 1; /* save two point operations in the first
1157 * round */
1158 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159 /* double */
1160 if (!skip)
1161 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162
1163 /* add multiples of the generator */
1164 if (gen_mul && (i <= 27)) {
1165 /* first, look 28 bits upwards */
1166 bits = get_bit(g_scalar, i + 196) << 3;
1167 bits |= get_bit(g_scalar, i + 140) << 2;
1168 bits |= get_bit(g_scalar, i + 84) << 1;
1169 bits |= get_bit(g_scalar, i + 28);
1170 /* select the point to add, in constant time */
1171 select_point(bits, 16, g_pre_comp[1], tmp);
1172
1173 if (!skip) {
1174 /* value 1 below is argument for "mixed" */
1175 point_add(nq[0], nq[1], nq[2],
1176 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177 } else {
1178 memcpy(nq, tmp, 3 * sizeof(felem));
1179 skip = 0;
1180 }
1181
1182 /* second, look at the current position */
1183 bits = get_bit(g_scalar, i + 168) << 3;
1184 bits |= get_bit(g_scalar, i + 112) << 2;
1185 bits |= get_bit(g_scalar, i + 56) << 1;
1186 bits |= get_bit(g_scalar, i);
1187 /* select the point to add, in constant time */
1188 select_point(bits, 16, g_pre_comp[0], tmp);
1189 point_add(nq[0], nq[1], nq[2],
1190 nq[0], nq[1], nq[2],
1191 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192 }
1193
1194 /* do other additions every 5 doublings */
1195 if (num_points && (i % 5 == 0)) {
1196 /* loop over all scalars */
1197 for (num = 0; num < num_points; ++num) {
1198 bits = get_bit(scalars[num], i + 4) << 5;
1199 bits |= get_bit(scalars[num], i + 3) << 4;
1200 bits |= get_bit(scalars[num], i + 2) << 3;
1201 bits |= get_bit(scalars[num], i + 1) << 2;
1202 bits |= get_bit(scalars[num], i) << 1;
1203 bits |= get_bit(scalars[num], i - 1);
1204 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205
1206 /* select the point to add or subtract */
1207 select_point(digit, 17, pre_comp[num], tmp);
1208 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209 * point */
1210 copy_conditional(tmp[1], tmp[3], sign);
1211
1212 if (!skip) {
1213 point_add(nq[0], nq[1], nq[2],
1214 nq[0], nq[1], nq[2],
1215 mixed, tmp[0], tmp[1], tmp[2]);
1216 } else {
1217 memcpy(nq, tmp, 3 * sizeof(felem));
1218 skip = 0;
1219 }
1220 }
1221 }
1222 }
1223 felem_assign(x_out, nq[0]);
1224 felem_assign(y_out, nq[1]);
1225 felem_assign(z_out, nq[2]);
1226 }
1227
1228 /******************************************************************************/
1229 /*
1230 * FUNCTIONS TO MANAGE PRECOMPUTATION
1231 */
1232
1233 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1234 {
1235 NISTP224_PRE_COMP *ret = NULL;
1236 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret));
1237 if (!ret) {
1238 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239 return ret;
1240 }
1241 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1242 ret->references = 1;
1243 return ret;
1244 }
1245
1246 static void *nistp224_pre_comp_dup(void *src_)
1247 {
1248 NISTP224_PRE_COMP *src = src_;
1249
1250 /* no need to actually copy, these objects never change! */
1251 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1252
1253 return src_;
1254 }
1255
1256 static void nistp224_pre_comp_free(void *pre_)
1257 {
1258 int i;
1259 NISTP224_PRE_COMP *pre = pre_;
1260
1261 if (!pre)
1262 return;
1263
1264 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1265 if (i > 0)
1266 return;
1267
1268 OPENSSL_free(pre);
1269 }
1270
1271 static void nistp224_pre_comp_clear_free(void *pre_)
1272 {
1273 int i;
1274 NISTP224_PRE_COMP *pre = pre_;
1275
1276 if (!pre)
1277 return;
1278
1279 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1280 if (i > 0)
1281 return;
1282
1283 OPENSSL_cleanse(pre, sizeof(*pre));
1284 OPENSSL_free(pre);
1285 }
1286
1287 /******************************************************************************/
1288 /*
1289 * OPENSSL EC_METHOD FUNCTIONS
1290 */
1291
1292 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1293 {
1294 int ret;
1295 ret = ec_GFp_simple_group_init(group);
1296 group->a_is_minus3 = 1;
1297 return ret;
1298 }
1299
1300 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1301 const BIGNUM *a, const BIGNUM *b,
1302 BN_CTX *ctx)
1303 {
1304 int ret = 0;
1305 BN_CTX *new_ctx = NULL;
1306 BIGNUM *curve_p, *curve_a, *curve_b;
1307
1308 if (ctx == NULL)
1309 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1310 return 0;
1311 BN_CTX_start(ctx);
1312 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1313 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1314 ((curve_b = BN_CTX_get(ctx)) == NULL))
1315 goto err;
1316 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1321 EC_R_WRONG_CURVE_PARAMETERS);
1322 goto err;
1323 }
1324 group->field_mod_func = BN_nist_mod_224;
1325 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1326 err:
1327 BN_CTX_end(ctx);
1328 if (new_ctx != NULL)
1329 BN_CTX_free(new_ctx);
1330 return ret;
1331 }
1332
1333 /*
1334 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335 * (X/Z^2, Y/Z^3)
1336 */
1337 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338 const EC_POINT *point,
1339 BIGNUM *x, BIGNUM *y,
1340 BN_CTX *ctx)
1341 {
1342 felem z1, z2, x_in, y_in, x_out, y_out;
1343 widefelem tmp;
1344
1345 if (EC_POINT_is_at_infinity(group, point)) {
1346 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1347 EC_R_POINT_AT_INFINITY);
1348 return 0;
1349 }
1350 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1351 (!BN_to_felem(z1, &point->Z)))
1352 return 0;
1353 felem_inv(z2, z1);
1354 felem_square(tmp, z2);
1355 felem_reduce(z1, tmp);
1356 felem_mul(tmp, x_in, z1);
1357 felem_reduce(x_in, tmp);
1358 felem_contract(x_out, x_in);
1359 if (x != NULL) {
1360 if (!felem_to_BN(x, x_out)) {
1361 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1362 ERR_R_BN_LIB);
1363 return 0;
1364 }
1365 }
1366 felem_mul(tmp, z1, z2);
1367 felem_reduce(z1, tmp);
1368 felem_mul(tmp, y_in, z1);
1369 felem_reduce(y_in, tmp);
1370 felem_contract(y_out, y_in);
1371 if (y != NULL) {
1372 if (!felem_to_BN(y, y_out)) {
1373 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1374 ERR_R_BN_LIB);
1375 return 0;
1376 }
1377 }
1378 return 1;
1379 }
1380
1381 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1382 felem tmp_felems[ /* num+1 */ ])
1383 {
1384 /*
1385 * Runs in constant time, unless an input is the point at infinity (which
1386 * normally shouldn't happen).
1387 */
1388 ec_GFp_nistp_points_make_affine_internal(num,
1389 points,
1390 sizeof(felem),
1391 tmp_felems,
1392 (void (*)(void *))felem_one,
1393 felem_is_zero_int,
1394 (void (*)(void *, const void *))
1395 felem_assign,
1396 (void (*)(void *, const void *))
1397 felem_square_reduce, (void (*)
1398 (void *,
1399 const void
1400 *,
1401 const void
1402 *))
1403 felem_mul_reduce,
1404 (void (*)(void *, const void *))
1405 felem_inv,
1406 (void (*)(void *, const void *))
1407 felem_contract);
1408 }
1409
1410 /*
1411 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1412 * values Result is stored in r (r can equal one of the inputs).
1413 */
1414 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1415 const BIGNUM *scalar, size_t num,
1416 const EC_POINT *points[],
1417 const BIGNUM *scalars[], BN_CTX *ctx)
1418 {
1419 int ret = 0;
1420 int j;
1421 unsigned i;
1422 int mixed = 0;
1423 BN_CTX *new_ctx = NULL;
1424 BIGNUM *x, *y, *z, *tmp_scalar;
1425 felem_bytearray g_secret;
1426 felem_bytearray *secrets = NULL;
1427 felem(*pre_comp)[17][3] = NULL;
1428 felem *tmp_felems = NULL;
1429 felem_bytearray tmp;
1430 unsigned num_bytes;
1431 int have_pre_comp = 0;
1432 size_t num_points = num;
1433 felem x_in, y_in, z_in, x_out, y_out, z_out;
1434 NISTP224_PRE_COMP *pre = NULL;
1435 const felem(*g_pre_comp)[16][3] = NULL;
1436 EC_POINT *generator = NULL;
1437 const EC_POINT *p = NULL;
1438 const BIGNUM *p_scalar = NULL;
1439
1440 if (ctx == NULL)
1441 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1442 return 0;
1443 BN_CTX_start(ctx);
1444 if (((x = BN_CTX_get(ctx)) == NULL) ||
1445 ((y = BN_CTX_get(ctx)) == NULL) ||
1446 ((z = BN_CTX_get(ctx)) == NULL) ||
1447 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1448 goto err;
1449
1450 if (scalar != NULL) {
1451 pre = EC_EX_DATA_get_data(group->extra_data,
1452 nistp224_pre_comp_dup,
1453 nistp224_pre_comp_free,
1454 nistp224_pre_comp_clear_free);
1455 if (pre)
1456 /* we have precomputation, try to use it */
1457 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1458 else
1459 /* try to use the standard precomputation */
1460 g_pre_comp = &gmul[0];
1461 generator = EC_POINT_new(group);
1462 if (generator == NULL)
1463 goto err;
1464 /* get the generator from precomputation */
1465 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1466 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1467 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1468 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1469 goto err;
1470 }
1471 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1472 generator, x, y, z,
1473 ctx))
1474 goto err;
1475 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1476 /* precomputation matches generator */
1477 have_pre_comp = 1;
1478 else
1479 /*
1480 * we don't have valid precomputation: treat the generator as a
1481 * random point
1482 */
1483 num_points = num_points + 1;
1484 }
1485
1486 if (num_points > 0) {
1487 if (num_points >= 3) {
1488 /*
1489 * unless we precompute multiples for just one or two points,
1490 * converting those into affine form is time well spent
1491 */
1492 mixed = 1;
1493 }
1494 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1495 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1496 if (mixed)
1497 tmp_felems =
1498 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1499 if ((secrets == NULL) || (pre_comp == NULL)
1500 || (mixed && (tmp_felems == NULL))) {
1501 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1502 goto err;
1503 }
1504
1505 /*
1506 * we treat NULL scalars as 0, and NULL points as points at infinity,
1507 * i.e., they contribute nothing to the linear combination
1508 */
1509 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1510 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1511 for (i = 0; i < num_points; ++i) {
1512 if (i == num)
1513 /* the generator */
1514 {
1515 p = EC_GROUP_get0_generator(group);
1516 p_scalar = scalar;
1517 } else
1518 /* the i^th point */
1519 {
1520 p = points[i];
1521 p_scalar = scalars[i];
1522 }
1523 if ((p_scalar != NULL) && (p != NULL)) {
1524 /* reduce scalar to 0 <= scalar < 2^224 */
1525 if ((BN_num_bits(p_scalar) > 224)
1526 || (BN_is_negative(p_scalar))) {
1527 /*
1528 * this is an unusual input, and we don't guarantee
1529 * constant-timeness
1530 */
1531 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1532 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1533 goto err;
1534 }
1535 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1536 } else
1537 num_bytes = BN_bn2bin(p_scalar, tmp);
1538 flip_endian(secrets[i], tmp, num_bytes);
1539 /* precompute multiples */
1540 if ((!BN_to_felem(x_out, &p->X)) ||
1541 (!BN_to_felem(y_out, &p->Y)) ||
1542 (!BN_to_felem(z_out, &p->Z)))
1543 goto err;
1544 felem_assign(pre_comp[i][1][0], x_out);
1545 felem_assign(pre_comp[i][1][1], y_out);
1546 felem_assign(pre_comp[i][1][2], z_out);
1547 for (j = 2; j <= 16; ++j) {
1548 if (j & 1) {
1549 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1550 pre_comp[i][j][2], pre_comp[i][1][0],
1551 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1552 pre_comp[i][j - 1][0],
1553 pre_comp[i][j - 1][1],
1554 pre_comp[i][j - 1][2]);
1555 } else {
1556 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1557 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1558 pre_comp[i][j / 2][1],
1559 pre_comp[i][j / 2][2]);
1560 }
1561 }
1562 }
1563 }
1564 if (mixed)
1565 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1566 }
1567
1568 /* the scalar for the generator */
1569 if ((scalar != NULL) && (have_pre_comp)) {
1570 memset(g_secret, 0, sizeof(g_secret));
1571 /* reduce scalar to 0 <= scalar < 2^224 */
1572 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1573 /*
1574 * this is an unusual input, and we don't guarantee
1575 * constant-timeness
1576 */
1577 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1578 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1579 goto err;
1580 }
1581 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1582 } else
1583 num_bytes = BN_bn2bin(scalar, tmp);
1584 flip_endian(g_secret, tmp, num_bytes);
1585 /* do the multiplication with generator precomputation */
1586 batch_mul(x_out, y_out, z_out,
1587 (const felem_bytearray(*))secrets, num_points,
1588 g_secret,
1589 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1590 } else
1591 /* do the multiplication without generator precomputation */
1592 batch_mul(x_out, y_out, z_out,
1593 (const felem_bytearray(*))secrets, num_points,
1594 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1595 /* reduce the output to its unique minimal representation */
1596 felem_contract(x_in, x_out);
1597 felem_contract(y_in, y_out);
1598 felem_contract(z_in, z_out);
1599 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1600 (!felem_to_BN(z, z_in))) {
1601 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1602 goto err;
1603 }
1604 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1605
1606 err:
1607 BN_CTX_end(ctx);
1608 if (generator != NULL)
1609 EC_POINT_free(generator);
1610 if (new_ctx != NULL)
1611 BN_CTX_free(new_ctx);
1612 if (secrets != NULL)
1613 OPENSSL_free(secrets);
1614 if (pre_comp != NULL)
1615 OPENSSL_free(pre_comp);
1616 if (tmp_felems != NULL)
1617 OPENSSL_free(tmp_felems);
1618 return ret;
1619 }
1620
1621 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1622 {
1623 int ret = 0;
1624 NISTP224_PRE_COMP *pre = NULL;
1625 int i, j;
1626 BN_CTX *new_ctx = NULL;
1627 BIGNUM *x, *y;
1628 EC_POINT *generator = NULL;
1629 felem tmp_felems[32];
1630
1631 /* throw away old precomputation */
1632 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1633 nistp224_pre_comp_free,
1634 nistp224_pre_comp_clear_free);
1635 if (ctx == NULL)
1636 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1637 return 0;
1638 BN_CTX_start(ctx);
1639 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1640 goto err;
1641 /* get the generator */
1642 if (group->generator == NULL)
1643 goto err;
1644 generator = EC_POINT_new(group);
1645 if (generator == NULL)
1646 goto err;
1647 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1648 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1649 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1650 goto err;
1651 if ((pre = nistp224_pre_comp_new()) == NULL)
1652 goto err;
1653 /*
1654 * if the generator is the standard one, use built-in precomputation
1655 */
1656 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1657 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1658 goto done;
1659 }
1660 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1661 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1662 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1663 goto err;
1664 /*
1665 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1666 * 2^140*G, 2^196*G for the second one
1667 */
1668 for (i = 1; i <= 8; i <<= 1) {
1669 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1670 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1671 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1672 for (j = 0; j < 27; ++j) {
1673 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1674 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1675 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1676 }
1677 if (i == 8)
1678 break;
1679 point_double(pre->g_pre_comp[0][2 * i][0],
1680 pre->g_pre_comp[0][2 * i][1],
1681 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1682 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1683 for (j = 0; j < 27; ++j) {
1684 point_double(pre->g_pre_comp[0][2 * i][0],
1685 pre->g_pre_comp[0][2 * i][1],
1686 pre->g_pre_comp[0][2 * i][2],
1687 pre->g_pre_comp[0][2 * i][0],
1688 pre->g_pre_comp[0][2 * i][1],
1689 pre->g_pre_comp[0][2 * i][2]);
1690 }
1691 }
1692 for (i = 0; i < 2; i++) {
1693 /* g_pre_comp[i][0] is the point at infinity */
1694 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1695 /* the remaining multiples */
1696 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1697 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1698 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1699 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1700 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1701 pre->g_pre_comp[i][2][2]);
1702 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1703 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1704 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1705 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1706 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1707 pre->g_pre_comp[i][2][2]);
1708 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1709 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1710 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1711 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1712 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1713 pre->g_pre_comp[i][4][2]);
1714 /*
1715 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1716 */
1717 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1718 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1719 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1720 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1721 pre->g_pre_comp[i][2][2]);
1722 for (j = 1; j < 8; ++j) {
1723 /* odd multiples: add G resp. 2^28*G */
1724 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1725 pre->g_pre_comp[i][2 * j + 1][1],
1726 pre->g_pre_comp[i][2 * j + 1][2],
1727 pre->g_pre_comp[i][2 * j][0],
1728 pre->g_pre_comp[i][2 * j][1],
1729 pre->g_pre_comp[i][2 * j][2], 0,
1730 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1731 pre->g_pre_comp[i][1][2]);
1732 }
1733 }
1734 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1735
1736 done:
1737 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1738 nistp224_pre_comp_free,
1739 nistp224_pre_comp_clear_free))
1740 goto err;
1741 ret = 1;
1742 pre = NULL;
1743 err:
1744 BN_CTX_end(ctx);
1745 if (generator != NULL)
1746 EC_POINT_free(generator);
1747 if (new_ctx != NULL)
1748 BN_CTX_free(new_ctx);
1749 if (pre)
1750 nistp224_pre_comp_free(pre);
1751 return ret;
1752 }
1753
1754 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1755 {
1756 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1757 nistp224_pre_comp_free,
1758 nistp224_pre_comp_clear_free)
1759 != NULL)
1760 return 1;
1761 else
1762 return 0;
1763 }
1764
1765 #else
1766 static void *dummy = &dummy;
1767 #endif