]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp224.c
Run util/openssl-format-source -v -c .
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
1 /* crypto/ec/ecp_nistp224.c */
2 /*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5 /* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 /*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31 # include <stdint.h>
32 # include <string.h>
33 # include <openssl/err.h>
34 # include "ec_lcl.h"
35
36 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
37 /* even with gcc, the typedef won't work for 32-bit platforms */
38 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
39 * platforms */
40 # else
41 # error "Need GCC 3.1 or later to define type uint128_t"
42 # endif
43
44 typedef uint8_t u8;
45 typedef uint64_t u64;
46 typedef int64_t s64;
47
48 /******************************************************************************/
49 /*-
50 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
51 *
52 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
53 * using 64-bit coefficients called 'limbs',
54 * and sometimes (for multiplication results) as
55 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
56 * using 128-bit coefficients called 'widelimbs'.
57 * A 4-limb representation is an 'felem';
58 * a 7-widelimb representation is a 'widefelem'.
59 * Even within felems, bits of adjacent limbs overlap, and we don't always
60 * reduce the representations: we ensure that inputs to each felem
61 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
62 * and fit into a 128-bit word without overflow. The coefficients are then
63 * again partially reduced to obtain an felem satisfying a_i < 2^57.
64 * We only reduce to the unique minimal representation at the end of the
65 * computation.
66 */
67
68 typedef uint64_t limb;
69 typedef uint128_t widelimb;
70
71 typedef limb felem[4];
72 typedef widelimb widefelem[7];
73
74 /*
75 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
76 * group order size for the elliptic curve, and we also use this type for
77 * scalars for point multiplication.
78 */
79 typedef u8 felem_bytearray[28];
80
81 static const felem_bytearray nistp224_curve_params[5] = {
82 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
83 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
84 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
85 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
88 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
89 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
90 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
91 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
92 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
93 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
94 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
95 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
96 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
97 };
98
99 /*-
100 * Precomputed multiples of the standard generator
101 * Points are given in coordinates (X, Y, Z) where Z normally is 1
102 * (0 for the point at infinity).
103 * For each field element, slice a_0 is word 0, etc.
104 *
105 * The table has 2 * 16 elements, starting with the following:
106 * index | bits | point
107 * ------+---------+------------------------------
108 * 0 | 0 0 0 0 | 0G
109 * 1 | 0 0 0 1 | 1G
110 * 2 | 0 0 1 0 | 2^56G
111 * 3 | 0 0 1 1 | (2^56 + 1)G
112 * 4 | 0 1 0 0 | 2^112G
113 * 5 | 0 1 0 1 | (2^112 + 1)G
114 * 6 | 0 1 1 0 | (2^112 + 2^56)G
115 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
116 * 8 | 1 0 0 0 | 2^168G
117 * 9 | 1 0 0 1 | (2^168 + 1)G
118 * 10 | 1 0 1 0 | (2^168 + 2^56)G
119 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
120 * 12 | 1 1 0 0 | (2^168 + 2^112)G
121 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
122 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
123 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
124 * followed by a copy of this with each element multiplied by 2^28.
125 *
126 * The reason for this is so that we can clock bits into four different
127 * locations when doing simple scalar multiplies against the base point,
128 * and then another four locations using the second 16 elements.
129 */
130 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
131 {0, 0, 0, 0},
132 {0, 0, 0, 0}},
133 {{0x3280d6115c1d21, 0xc1d356c2112234,
134 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
135 {0xd5819985007e34, 0x75a05a07476444,
136 0xfb4c22dfe6cd43, 0xbd376388b5f723},
137 {1, 0, 0, 0}},
138 {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
139 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
140 {0x29e0b892dc9c43, 0xece8608436e662,
141 0xdc858f185310d0, 0x9812dd4eb8d321},
142 {1, 0, 0, 0}},
143 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
144 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
145 {0xf19f90ed50266d, 0xabf2b4bf65f9df,
146 0x313865468fafec, 0x5cb379ba910a17},
147 {1, 0, 0, 0}},
148 {{0x0641966cab26e3, 0x91fb2991fab0a0,
149 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150 {0x7510407766af5d, 0x84d929610d5450,
151 0x81d77aae82f706, 0x6916f6d4338c5b},
152 {1, 0, 0, 0}},
153 {{0xea95ac3b1f15c6, 0x086000905e82d4,
154 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155 {0x9ef93dea25dbbf, 0x41665960f390f0,
156 0xfdec76dbe2a8a7, 0x523e80f019062a},
157 {1, 0, 0, 0}},
158 {{0x822fdd26732c73, 0xa01c83531b5d0f,
159 0x363f37347c1ba4, 0xc391b45c84725c},
160 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
161 0xc393da7e222a7f, 0x1efb7890ede244},
162 {1, 0, 0, 0}},
163 {{0x4c9e90ca217da1, 0xd11beca79159bb,
164 0xff8d33c2c98b7c, 0x2610b39409f849},
165 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
166 0x966c079b753c89, 0xfe67e4e820b112},
167 {1, 0, 0, 0}},
168 {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
169 0x79b7619a3e7c4c, 0x05c73240899b47},
170 {0x9f7f6382c73e3a, 0x18615165c56bda,
171 0x641fab2116fd56, 0x72855882b08394},
172 {1, 0, 0, 0}},
173 {{0x0469182f161c09, 0x74a98ca8d00fb5,
174 0xb89da93489a3e0, 0x41c98768fb0c1d},
175 {0xe5ea05fb32da81, 0x3dce9ffbca6855,
176 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
177 {1, 0, 0, 0}},
178 {{0xdab22b2333e87f, 0x4430137a5dd2f6,
179 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
180 {0x764a7df0c8fda5, 0x185ba5c3fa2044,
181 0x9281d688bcbe50, 0xc40331df893881},
182 {1, 0, 0, 0}},
183 {{0xb89530796f0f60, 0xade92bd26909a3,
184 0x1a0c83fb4884da, 0x1765bf22a5a984},
185 {0x772a9ee75db09e, 0x23bc6c67cec16f,
186 0x4c1edba8b14e2f, 0xe2a215d9611369},
187 {1, 0, 0, 0}},
188 {{0x571e509fb5efb3, 0xade88696410552,
189 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
190 {0xff9f51160f4652, 0xb47ce2495a6539,
191 0xa2946c53b582f4, 0x286d2db3ee9a60},
192 {1, 0, 0, 0}},
193 {{0x40bbd5081a44af, 0x0995183b13926c,
194 0xbcefba6f47f6d0, 0x215619e9cc0057},
195 {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
196 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
197 {1, 0, 0, 0}},
198 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
199 0x1c29819435d2c6, 0xc813132f4c07e9},
200 {0x2891425503b11f, 0x08781030579fea,
201 0xf5426ba5cc9674, 0x1e28ebf18562bc},
202 {1, 0, 0, 0}},
203 {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
204 0xff17036691a973, 0xf1aef351497c58},
205 {0xdd1f2d600564ff, 0xdead073b1402db,
206 0x74a684435bd693, 0xeea7471f962558},
207 {1, 0, 0, 0}}},
208 {{{0, 0, 0, 0},
209 {0, 0, 0, 0},
210 {0, 0, 0, 0}},
211 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
212 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
213 {1, 0, 0, 0}},
214 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
215 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
216 {1, 0, 0, 0}},
217 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
218 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
219 {1, 0, 0, 0}},
220 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
221 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
222 {1, 0, 0, 0}},
223 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
224 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
225 {1, 0, 0, 0}},
226 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
227 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
228 {1, 0, 0, 0}},
229 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
230 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
231 {1, 0, 0, 0}},
232 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
233 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
234 {1, 0, 0, 0}},
235 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
236 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
237 {1, 0, 0, 0}},
238 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
239 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
240 {1, 0, 0, 0}},
241 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
242 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
243 {1, 0, 0, 0}},
244 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
245 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
246 {1, 0, 0, 0}},
247 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
248 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
249 {1, 0, 0, 0}},
250 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
251 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
252 {1, 0, 0, 0}},
253 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
254 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
255 {1, 0, 0, 0}}}
256 };
257
258 /* Precomputation for the group generator. */
259 typedef struct {
260 felem g_pre_comp[2][16][3];
261 int references;
262 } NISTP224_PRE_COMP;
263
264 const EC_METHOD *EC_GFp_nistp224_method(void)
265 {
266 static const EC_METHOD ret = {
267 EC_FLAGS_DEFAULT_OCT,
268 NID_X9_62_prime_field,
269 ec_GFp_nistp224_group_init,
270 ec_GFp_simple_group_finish,
271 ec_GFp_simple_group_clear_finish,
272 ec_GFp_nist_group_copy,
273 ec_GFp_nistp224_group_set_curve,
274 ec_GFp_simple_group_get_curve,
275 ec_GFp_simple_group_get_degree,
276 ec_GFp_simple_group_check_discriminant,
277 ec_GFp_simple_point_init,
278 ec_GFp_simple_point_finish,
279 ec_GFp_simple_point_clear_finish,
280 ec_GFp_simple_point_copy,
281 ec_GFp_simple_point_set_to_infinity,
282 ec_GFp_simple_set_Jprojective_coordinates_GFp,
283 ec_GFp_simple_get_Jprojective_coordinates_GFp,
284 ec_GFp_simple_point_set_affine_coordinates,
285 ec_GFp_nistp224_point_get_affine_coordinates,
286 0 /* point_set_compressed_coordinates */ ,
287 0 /* point2oct */ ,
288 0 /* oct2point */ ,
289 ec_GFp_simple_add,
290 ec_GFp_simple_dbl,
291 ec_GFp_simple_invert,
292 ec_GFp_simple_is_at_infinity,
293 ec_GFp_simple_is_on_curve,
294 ec_GFp_simple_cmp,
295 ec_GFp_simple_make_affine,
296 ec_GFp_simple_points_make_affine,
297 ec_GFp_nistp224_points_mul,
298 ec_GFp_nistp224_precompute_mult,
299 ec_GFp_nistp224_have_precompute_mult,
300 ec_GFp_nist_field_mul,
301 ec_GFp_nist_field_sqr,
302 0 /* field_div */ ,
303 0 /* field_encode */ ,
304 0 /* field_decode */ ,
305 0 /* field_set_to_one */
306 };
307
308 return &ret;
309 }
310
311 /*
312 * Helper functions to convert field elements to/from internal representation
313 */
314 static void bin28_to_felem(felem out, const u8 in[28])
315 {
316 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
317 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
318 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
319 out[3] = (*((const uint64_t *)(in + 21))) & 0x00ffffffffffffff;
320 }
321
322 static void felem_to_bin28(u8 out[28], const felem in)
323 {
324 unsigned i;
325 for (i = 0; i < 7; ++i) {
326 out[i] = in[0] >> (8 * i);
327 out[i + 7] = in[1] >> (8 * i);
328 out[i + 14] = in[2] >> (8 * i);
329 out[i + 21] = in[3] >> (8 * i);
330 }
331 }
332
333 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
334 static void flip_endian(u8 *out, const u8 *in, unsigned len)
335 {
336 unsigned i;
337 for (i = 0; i < len; ++i)
338 out[i] = in[len - 1 - i];
339 }
340
341 /* From OpenSSL BIGNUM to internal representation */
342 static int BN_to_felem(felem out, const BIGNUM *bn)
343 {
344 felem_bytearray b_in;
345 felem_bytearray b_out;
346 unsigned num_bytes;
347
348 /* BN_bn2bin eats leading zeroes */
349 memset(b_out, 0, sizeof b_out);
350 num_bytes = BN_num_bytes(bn);
351 if (num_bytes > sizeof b_out) {
352 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
353 return 0;
354 }
355 if (BN_is_negative(bn)) {
356 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
357 return 0;
358 }
359 num_bytes = BN_bn2bin(bn, b_in);
360 flip_endian(b_out, b_in, num_bytes);
361 bin28_to_felem(out, b_out);
362 return 1;
363 }
364
365 /* From internal representation to OpenSSL BIGNUM */
366 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
367 {
368 felem_bytearray b_in, b_out;
369 felem_to_bin28(b_in, in);
370 flip_endian(b_out, b_in, sizeof b_out);
371 return BN_bin2bn(b_out, sizeof b_out, out);
372 }
373
374 /******************************************************************************/
375 /*-
376 * FIELD OPERATIONS
377 *
378 * Field operations, using the internal representation of field elements.
379 * NB! These operations are specific to our point multiplication and cannot be
380 * expected to be correct in general - e.g., multiplication with a large scalar
381 * will cause an overflow.
382 *
383 */
384
385 static void felem_one(felem out)
386 {
387 out[0] = 1;
388 out[1] = 0;
389 out[2] = 0;
390 out[3] = 0;
391 }
392
393 static void felem_assign(felem out, const felem in)
394 {
395 out[0] = in[0];
396 out[1] = in[1];
397 out[2] = in[2];
398 out[3] = in[3];
399 }
400
401 /* Sum two field elements: out += in */
402 static void felem_sum(felem out, const felem in)
403 {
404 out[0] += in[0];
405 out[1] += in[1];
406 out[2] += in[2];
407 out[3] += in[3];
408 }
409
410 /* Get negative value: out = -in */
411 /* Assumes in[i] < 2^57 */
412 static void felem_neg(felem out, const felem in)
413 {
414 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
415 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
416 static const limb two58m42m2 = (((limb) 1) << 58) -
417 (((limb) 1) << 42) - (((limb) 1) << 2);
418
419 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
420 out[0] = two58p2 - in[0];
421 out[1] = two58m42m2 - in[1];
422 out[2] = two58m2 - in[2];
423 out[3] = two58m2 - in[3];
424 }
425
426 /* Subtract field elements: out -= in */
427 /* Assumes in[i] < 2^57 */
428 static void felem_diff(felem out, const felem in)
429 {
430 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
431 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
432 static const limb two58m42m2 = (((limb) 1) << 58) -
433 (((limb) 1) << 42) - (((limb) 1) << 2);
434
435 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
436 out[0] += two58p2;
437 out[1] += two58m42m2;
438 out[2] += two58m2;
439 out[3] += two58m2;
440
441 out[0] -= in[0];
442 out[1] -= in[1];
443 out[2] -= in[2];
444 out[3] -= in[3];
445 }
446
447 /* Subtract in unreduced 128-bit mode: out -= in */
448 /* Assumes in[i] < 2^119 */
449 static void widefelem_diff(widefelem out, const widefelem in)
450 {
451 static const widelimb two120 = ((widelimb) 1) << 120;
452 static const widelimb two120m64 = (((widelimb) 1) << 120) -
453 (((widelimb) 1) << 64);
454 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
455 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
456
457 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
458 out[0] += two120;
459 out[1] += two120m64;
460 out[2] += two120m64;
461 out[3] += two120;
462 out[4] += two120m104m64;
463 out[5] += two120m64;
464 out[6] += two120m64;
465
466 out[0] -= in[0];
467 out[1] -= in[1];
468 out[2] -= in[2];
469 out[3] -= in[3];
470 out[4] -= in[4];
471 out[5] -= in[5];
472 out[6] -= in[6];
473 }
474
475 /* Subtract in mixed mode: out128 -= in64 */
476 /* in[i] < 2^63 */
477 static void felem_diff_128_64(widefelem out, const felem in)
478 {
479 static const widelimb two64p8 = (((widelimb) 1) << 64) +
480 (((widelimb) 1) << 8);
481 static const widelimb two64m8 = (((widelimb) 1) << 64) -
482 (((widelimb) 1) << 8);
483 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
484 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
485
486 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
487 out[0] += two64p8;
488 out[1] += two64m48m8;
489 out[2] += two64m8;
490 out[3] += two64m8;
491
492 out[0] -= in[0];
493 out[1] -= in[1];
494 out[2] -= in[2];
495 out[3] -= in[3];
496 }
497
498 /*
499 * Multiply a field element by a scalar: out = out * scalar The scalars we
500 * actually use are small, so results fit without overflow
501 */
502 static void felem_scalar(felem out, const limb scalar)
503 {
504 out[0] *= scalar;
505 out[1] *= scalar;
506 out[2] *= scalar;
507 out[3] *= scalar;
508 }
509
510 /*
511 * Multiply an unreduced field element by a scalar: out = out * scalar The
512 * scalars we actually use are small, so results fit without overflow
513 */
514 static void widefelem_scalar(widefelem out, const widelimb scalar)
515 {
516 out[0] *= scalar;
517 out[1] *= scalar;
518 out[2] *= scalar;
519 out[3] *= scalar;
520 out[4] *= scalar;
521 out[5] *= scalar;
522 out[6] *= scalar;
523 }
524
525 /* Square a field element: out = in^2 */
526 static void felem_square(widefelem out, const felem in)
527 {
528 limb tmp0, tmp1, tmp2;
529 tmp0 = 2 * in[0];
530 tmp1 = 2 * in[1];
531 tmp2 = 2 * in[2];
532 out[0] = ((widelimb) in[0]) * in[0];
533 out[1] = ((widelimb) in[0]) * tmp1;
534 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
535 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
536 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
537 out[5] = ((widelimb) in[3]) * tmp2;
538 out[6] = ((widelimb) in[3]) * in[3];
539 }
540
541 /* Multiply two field elements: out = in1 * in2 */
542 static void felem_mul(widefelem out, const felem in1, const felem in2)
543 {
544 out[0] = ((widelimb) in1[0]) * in2[0];
545 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
546 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
547 ((widelimb) in1[2]) * in2[0];
548 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
549 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
550 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
551 ((widelimb) in1[3]) * in2[1];
552 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
553 out[6] = ((widelimb) in1[3]) * in2[3];
554 }
555
556 /*-
557 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
558 * Requires in[i] < 2^126,
559 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
560 static void felem_reduce(felem out, const widefelem in)
561 {
562 static const widelimb two127p15 = (((widelimb) 1) << 127) +
563 (((widelimb) 1) << 15);
564 static const widelimb two127m71 = (((widelimb) 1) << 127) -
565 (((widelimb) 1) << 71);
566 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
567 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
568 widelimb output[5];
569
570 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
571 output[0] = in[0] + two127p15;
572 output[1] = in[1] + two127m71m55;
573 output[2] = in[2] + two127m71;
574 output[3] = in[3];
575 output[4] = in[4];
576
577 /* Eliminate in[4], in[5], in[6] */
578 output[4] += in[6] >> 16;
579 output[3] += (in[6] & 0xffff) << 40;
580 output[2] -= in[6];
581
582 output[3] += in[5] >> 16;
583 output[2] += (in[5] & 0xffff) << 40;
584 output[1] -= in[5];
585
586 output[2] += output[4] >> 16;
587 output[1] += (output[4] & 0xffff) << 40;
588 output[0] -= output[4];
589
590 /* Carry 2 -> 3 -> 4 */
591 output[3] += output[2] >> 56;
592 output[2] &= 0x00ffffffffffffff;
593
594 output[4] = output[3] >> 56;
595 output[3] &= 0x00ffffffffffffff;
596
597 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
598
599 /* Eliminate output[4] */
600 output[2] += output[4] >> 16;
601 /* output[2] < 2^56 + 2^56 = 2^57 */
602 output[1] += (output[4] & 0xffff) << 40;
603 output[0] -= output[4];
604
605 /* Carry 0 -> 1 -> 2 -> 3 */
606 output[1] += output[0] >> 56;
607 out[0] = output[0] & 0x00ffffffffffffff;
608
609 output[2] += output[1] >> 56;
610 /* output[2] < 2^57 + 2^72 */
611 out[1] = output[1] & 0x00ffffffffffffff;
612 output[3] += output[2] >> 56;
613 /* output[3] <= 2^56 + 2^16 */
614 out[2] = output[2] & 0x00ffffffffffffff;
615
616 /*-
617 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
618 * out[3] <= 2^56 + 2^16 (due to final carry),
619 * so out < 2*p
620 */
621 out[3] = output[3];
622 }
623
624 static void felem_square_reduce(felem out, const felem in)
625 {
626 widefelem tmp;
627 felem_square(tmp, in);
628 felem_reduce(out, tmp);
629 }
630
631 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
632 {
633 widefelem tmp;
634 felem_mul(tmp, in1, in2);
635 felem_reduce(out, tmp);
636 }
637
638 /*
639 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
640 * call felem_reduce first)
641 */
642 static void felem_contract(felem out, const felem in)
643 {
644 static const int64_t two56 = ((limb) 1) << 56;
645 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
646 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
647 int64_t tmp[4], a;
648 tmp[0] = in[0];
649 tmp[1] = in[1];
650 tmp[2] = in[2];
651 tmp[3] = in[3];
652 /* Case 1: a = 1 iff in >= 2^224 */
653 a = (in[3] >> 56);
654 tmp[0] -= a;
655 tmp[1] += a << 40;
656 tmp[3] &= 0x00ffffffffffffff;
657 /*
658 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
659 * and the lower part is non-zero
660 */
661 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
662 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
663 a &= 0x00ffffffffffffff;
664 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
665 a = (a - 1) >> 63;
666 /* subtract 2^224 - 2^96 + 1 if a is all-one */
667 tmp[3] &= a ^ 0xffffffffffffffff;
668 tmp[2] &= a ^ 0xffffffffffffffff;
669 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
670 tmp[0] -= 1 & a;
671
672 /*
673 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
674 * non-zero, so we only need one step
675 */
676 a = tmp[0] >> 63;
677 tmp[0] += two56 & a;
678 tmp[1] -= 1 & a;
679
680 /* carry 1 -> 2 -> 3 */
681 tmp[2] += tmp[1] >> 56;
682 tmp[1] &= 0x00ffffffffffffff;
683
684 tmp[3] += tmp[2] >> 56;
685 tmp[2] &= 0x00ffffffffffffff;
686
687 /* Now 0 <= out < p */
688 out[0] = tmp[0];
689 out[1] = tmp[1];
690 out[2] = tmp[2];
691 out[3] = tmp[3];
692 }
693
694 /*
695 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
696 * elements are reduced to in < 2^225, so we only need to check three cases:
697 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
698 */
699 static limb felem_is_zero(const felem in)
700 {
701 limb zero, two224m96p1, two225m97p2;
702
703 zero = in[0] | in[1] | in[2] | in[3];
704 zero = (((int64_t) (zero) - 1) >> 63) & 1;
705 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
706 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
707 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
708 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
709 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
710 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
711 return (zero | two224m96p1 | two225m97p2);
712 }
713
714 static limb felem_is_zero_int(const felem in)
715 {
716 return (int)(felem_is_zero(in) & ((limb) 1));
717 }
718
719 /* Invert a field element */
720 /* Computation chain copied from djb's code */
721 static void felem_inv(felem out, const felem in)
722 {
723 felem ftmp, ftmp2, ftmp3, ftmp4;
724 widefelem tmp;
725 unsigned i;
726
727 felem_square(tmp, in);
728 felem_reduce(ftmp, tmp); /* 2 */
729 felem_mul(tmp, in, ftmp);
730 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
731 felem_square(tmp, ftmp);
732 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
733 felem_mul(tmp, in, ftmp);
734 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
735 felem_square(tmp, ftmp);
736 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
737 felem_square(tmp, ftmp2);
738 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
739 felem_square(tmp, ftmp2);
740 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
741 felem_mul(tmp, ftmp2, ftmp);
742 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
743 felem_square(tmp, ftmp);
744 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
745 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
746 felem_square(tmp, ftmp2);
747 felem_reduce(ftmp2, tmp);
748 }
749 felem_mul(tmp, ftmp2, ftmp);
750 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
751 felem_square(tmp, ftmp2);
752 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
753 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
754 felem_square(tmp, ftmp3);
755 felem_reduce(ftmp3, tmp);
756 }
757 felem_mul(tmp, ftmp3, ftmp2);
758 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
759 felem_square(tmp, ftmp2);
760 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
761 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
762 felem_square(tmp, ftmp3);
763 felem_reduce(ftmp3, tmp);
764 }
765 felem_mul(tmp, ftmp3, ftmp2);
766 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
767 felem_square(tmp, ftmp3);
768 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
769 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
770 felem_square(tmp, ftmp4);
771 felem_reduce(ftmp4, tmp);
772 }
773 felem_mul(tmp, ftmp3, ftmp4);
774 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
775 felem_square(tmp, ftmp3);
776 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
777 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
778 felem_square(tmp, ftmp4);
779 felem_reduce(ftmp4, tmp);
780 }
781 felem_mul(tmp, ftmp2, ftmp4);
782 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
783 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
784 felem_square(tmp, ftmp2);
785 felem_reduce(ftmp2, tmp);
786 }
787 felem_mul(tmp, ftmp2, ftmp);
788 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
789 felem_square(tmp, ftmp);
790 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
791 felem_mul(tmp, ftmp, in);
792 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
793 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
794 felem_square(tmp, ftmp);
795 felem_reduce(ftmp, tmp);
796 }
797 felem_mul(tmp, ftmp, ftmp3);
798 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
799 }
800
801 /*
802 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
803 * out to itself.
804 */
805 static void copy_conditional(felem out, const felem in, limb icopy)
806 {
807 unsigned i;
808 /*
809 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
810 */
811 const limb copy = -icopy;
812 for (i = 0; i < 4; ++i) {
813 const limb tmp = copy & (in[i] ^ out[i]);
814 out[i] ^= tmp;
815 }
816 }
817
818 /******************************************************************************/
819 /*-
820 * ELLIPTIC CURVE POINT OPERATIONS
821 *
822 * Points are represented in Jacobian projective coordinates:
823 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
824 * or to the point at infinity if Z == 0.
825 *
826 */
827
828 /*-
829 * Double an elliptic curve point:
830 * (X', Y', Z') = 2 * (X, Y, Z), where
831 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
832 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
833 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
834 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
835 * while x_out == y_in is not (maybe this works, but it's not tested).
836 */
837 static void
838 point_double(felem x_out, felem y_out, felem z_out,
839 const felem x_in, const felem y_in, const felem z_in)
840 {
841 widefelem tmp, tmp2;
842 felem delta, gamma, beta, alpha, ftmp, ftmp2;
843
844 felem_assign(ftmp, x_in);
845 felem_assign(ftmp2, x_in);
846
847 /* delta = z^2 */
848 felem_square(tmp, z_in);
849 felem_reduce(delta, tmp);
850
851 /* gamma = y^2 */
852 felem_square(tmp, y_in);
853 felem_reduce(gamma, tmp);
854
855 /* beta = x*gamma */
856 felem_mul(tmp, x_in, gamma);
857 felem_reduce(beta, tmp);
858
859 /* alpha = 3*(x-delta)*(x+delta) */
860 felem_diff(ftmp, delta);
861 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
862 felem_sum(ftmp2, delta);
863 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
864 felem_scalar(ftmp2, 3);
865 /* ftmp2[i] < 3 * 2^58 < 2^60 */
866 felem_mul(tmp, ftmp, ftmp2);
867 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
868 felem_reduce(alpha, tmp);
869
870 /* x' = alpha^2 - 8*beta */
871 felem_square(tmp, alpha);
872 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
873 felem_assign(ftmp, beta);
874 felem_scalar(ftmp, 8);
875 /* ftmp[i] < 8 * 2^57 = 2^60 */
876 felem_diff_128_64(tmp, ftmp);
877 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
878 felem_reduce(x_out, tmp);
879
880 /* z' = (y + z)^2 - gamma - delta */
881 felem_sum(delta, gamma);
882 /* delta[i] < 2^57 + 2^57 = 2^58 */
883 felem_assign(ftmp, y_in);
884 felem_sum(ftmp, z_in);
885 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
886 felem_square(tmp, ftmp);
887 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
888 felem_diff_128_64(tmp, delta);
889 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
890 felem_reduce(z_out, tmp);
891
892 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
893 felem_scalar(beta, 4);
894 /* beta[i] < 4 * 2^57 = 2^59 */
895 felem_diff(beta, x_out);
896 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
897 felem_mul(tmp, alpha, beta);
898 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
899 felem_square(tmp2, gamma);
900 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
901 widefelem_scalar(tmp2, 8);
902 /* tmp2[i] < 8 * 2^116 = 2^119 */
903 widefelem_diff(tmp, tmp2);
904 /* tmp[i] < 2^119 + 2^120 < 2^121 */
905 felem_reduce(y_out, tmp);
906 }
907
908 /*-
909 * Add two elliptic curve points:
910 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
911 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
912 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
913 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
914 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
915 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
916 *
917 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
918 */
919
920 /*
921 * This function is not entirely constant-time: it includes a branch for
922 * checking whether the two input points are equal, (while not equal to the
923 * point at infinity). This case never happens during single point
924 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
925 */
926 static void point_add(felem x3, felem y3, felem z3,
927 const felem x1, const felem y1, const felem z1,
928 const int mixed, const felem x2, const felem y2,
929 const felem z2)
930 {
931 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
932 widefelem tmp, tmp2;
933 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
934
935 if (!mixed) {
936 /* ftmp2 = z2^2 */
937 felem_square(tmp, z2);
938 felem_reduce(ftmp2, tmp);
939
940 /* ftmp4 = z2^3 */
941 felem_mul(tmp, ftmp2, z2);
942 felem_reduce(ftmp4, tmp);
943
944 /* ftmp4 = z2^3*y1 */
945 felem_mul(tmp2, ftmp4, y1);
946 felem_reduce(ftmp4, tmp2);
947
948 /* ftmp2 = z2^2*x1 */
949 felem_mul(tmp2, ftmp2, x1);
950 felem_reduce(ftmp2, tmp2);
951 } else {
952 /*
953 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
954 */
955
956 /* ftmp4 = z2^3*y1 */
957 felem_assign(ftmp4, y1);
958
959 /* ftmp2 = z2^2*x1 */
960 felem_assign(ftmp2, x1);
961 }
962
963 /* ftmp = z1^2 */
964 felem_square(tmp, z1);
965 felem_reduce(ftmp, tmp);
966
967 /* ftmp3 = z1^3 */
968 felem_mul(tmp, ftmp, z1);
969 felem_reduce(ftmp3, tmp);
970
971 /* tmp = z1^3*y2 */
972 felem_mul(tmp, ftmp3, y2);
973 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
974
975 /* ftmp3 = z1^3*y2 - z2^3*y1 */
976 felem_diff_128_64(tmp, ftmp4);
977 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
978 felem_reduce(ftmp3, tmp);
979
980 /* tmp = z1^2*x2 */
981 felem_mul(tmp, ftmp, x2);
982 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
983
984 /* ftmp = z1^2*x2 - z2^2*x1 */
985 felem_diff_128_64(tmp, ftmp2);
986 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
987 felem_reduce(ftmp, tmp);
988
989 /*
990 * the formulae are incorrect if the points are equal so we check for
991 * this and do doubling if this happens
992 */
993 x_equal = felem_is_zero(ftmp);
994 y_equal = felem_is_zero(ftmp3);
995 z1_is_zero = felem_is_zero(z1);
996 z2_is_zero = felem_is_zero(z2);
997 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
998 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
999 point_double(x3, y3, z3, x1, y1, z1);
1000 return;
1001 }
1002
1003 /* ftmp5 = z1*z2 */
1004 if (!mixed) {
1005 felem_mul(tmp, z1, z2);
1006 felem_reduce(ftmp5, tmp);
1007 } else {
1008 /* special case z2 = 0 is handled later */
1009 felem_assign(ftmp5, z1);
1010 }
1011
1012 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1013 felem_mul(tmp, ftmp, ftmp5);
1014 felem_reduce(z_out, tmp);
1015
1016 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1017 felem_assign(ftmp5, ftmp);
1018 felem_square(tmp, ftmp);
1019 felem_reduce(ftmp, tmp);
1020
1021 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1022 felem_mul(tmp, ftmp, ftmp5);
1023 felem_reduce(ftmp5, tmp);
1024
1025 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1026 felem_mul(tmp, ftmp2, ftmp);
1027 felem_reduce(ftmp2, tmp);
1028
1029 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1030 felem_mul(tmp, ftmp4, ftmp5);
1031 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1032
1033 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1034 felem_square(tmp2, ftmp3);
1035 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1036
1037 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1038 felem_diff_128_64(tmp2, ftmp5);
1039 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1040
1041 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1042 felem_assign(ftmp5, ftmp2);
1043 felem_scalar(ftmp5, 2);
1044 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1045
1046 /*-
1047 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1048 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1049 */
1050 felem_diff_128_64(tmp2, ftmp5);
1051 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1052 felem_reduce(x_out, tmp2);
1053
1054 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1055 felem_diff(ftmp2, x_out);
1056 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1057
1058 /*
1059 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1060 */
1061 felem_mul(tmp2, ftmp3, ftmp2);
1062 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1063
1064 /*-
1065 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1066 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1067 */
1068 widefelem_diff(tmp2, tmp);
1069 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1070 felem_reduce(y_out, tmp2);
1071
1072 /*
1073 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1074 * the point at infinity, so we need to check for this separately
1075 */
1076
1077 /*
1078 * if point 1 is at infinity, copy point 2 to output, and vice versa
1079 */
1080 copy_conditional(x_out, x2, z1_is_zero);
1081 copy_conditional(x_out, x1, z2_is_zero);
1082 copy_conditional(y_out, y2, z1_is_zero);
1083 copy_conditional(y_out, y1, z2_is_zero);
1084 copy_conditional(z_out, z2, z1_is_zero);
1085 copy_conditional(z_out, z1, z2_is_zero);
1086 felem_assign(x3, x_out);
1087 felem_assign(y3, y_out);
1088 felem_assign(z3, z_out);
1089 }
1090
1091 /*
1092 * select_point selects the |idx|th point from a precomputation table and
1093 * copies it to out.
1094 * The pre_comp array argument should be size of |size| argument
1095 */
1096 static void select_point(const u64 idx, unsigned int size,
1097 const felem pre_comp[][3], felem out[3])
1098 {
1099 unsigned i, j;
1100 limb *outlimbs = &out[0][0];
1101 memset(outlimbs, 0, 3 * sizeof(felem));
1102
1103 for (i = 0; i < size; i++) {
1104 const limb *inlimbs = &pre_comp[i][0][0];
1105 u64 mask = i ^ idx;
1106 mask |= mask >> 4;
1107 mask |= mask >> 2;
1108 mask |= mask >> 1;
1109 mask &= 1;
1110 mask--;
1111 for (j = 0; j < 4 * 3; j++)
1112 outlimbs[j] |= inlimbs[j] & mask;
1113 }
1114 }
1115
1116 /* get_bit returns the |i|th bit in |in| */
1117 static char get_bit(const felem_bytearray in, unsigned i)
1118 {
1119 if (i >= 224)
1120 return 0;
1121 return (in[i >> 3] >> (i & 7)) & 1;
1122 }
1123
1124 /*
1125 * Interleaved point multiplication using precomputed point multiples: The
1126 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1127 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1128 * generator, using certain (large) precomputed multiples in g_pre_comp.
1129 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1130 */
1131 static void batch_mul(felem x_out, felem y_out, felem z_out,
1132 const felem_bytearray scalars[],
1133 const unsigned num_points, const u8 *g_scalar,
1134 const int mixed, const felem pre_comp[][17][3],
1135 const felem g_pre_comp[2][16][3])
1136 {
1137 int i, skip;
1138 unsigned num;
1139 unsigned gen_mul = (g_scalar != NULL);
1140 felem nq[3], tmp[4];
1141 u64 bits;
1142 u8 sign, digit;
1143
1144 /* set nq to the point at infinity */
1145 memset(nq, 0, 3 * sizeof(felem));
1146
1147 /*
1148 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1149 * of the generator (two in each of the last 28 rounds) and additions of
1150 * other points multiples (every 5th round).
1151 */
1152 skip = 1; /* save two point operations in the first
1153 * round */
1154 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1155 /* double */
1156 if (!skip)
1157 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1158
1159 /* add multiples of the generator */
1160 if (gen_mul && (i <= 27)) {
1161 /* first, look 28 bits upwards */
1162 bits = get_bit(g_scalar, i + 196) << 3;
1163 bits |= get_bit(g_scalar, i + 140) << 2;
1164 bits |= get_bit(g_scalar, i + 84) << 1;
1165 bits |= get_bit(g_scalar, i + 28);
1166 /* select the point to add, in constant time */
1167 select_point(bits, 16, g_pre_comp[1], tmp);
1168
1169 if (!skip) {
1170 /* value 1 below is argument for "mixed" */
1171 point_add(nq[0], nq[1], nq[2],
1172 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1173 } else {
1174 memcpy(nq, tmp, 3 * sizeof(felem));
1175 skip = 0;
1176 }
1177
1178 /* second, look at the current position */
1179 bits = get_bit(g_scalar, i + 168) << 3;
1180 bits |= get_bit(g_scalar, i + 112) << 2;
1181 bits |= get_bit(g_scalar, i + 56) << 1;
1182 bits |= get_bit(g_scalar, i);
1183 /* select the point to add, in constant time */
1184 select_point(bits, 16, g_pre_comp[0], tmp);
1185 point_add(nq[0], nq[1], nq[2],
1186 nq[0], nq[1], nq[2],
1187 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1188 }
1189
1190 /* do other additions every 5 doublings */
1191 if (num_points && (i % 5 == 0)) {
1192 /* loop over all scalars */
1193 for (num = 0; num < num_points; ++num) {
1194 bits = get_bit(scalars[num], i + 4) << 5;
1195 bits |= get_bit(scalars[num], i + 3) << 4;
1196 bits |= get_bit(scalars[num], i + 2) << 3;
1197 bits |= get_bit(scalars[num], i + 1) << 2;
1198 bits |= get_bit(scalars[num], i) << 1;
1199 bits |= get_bit(scalars[num], i - 1);
1200 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1201
1202 /* select the point to add or subtract */
1203 select_point(digit, 17, pre_comp[num], tmp);
1204 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1205 * point */
1206 copy_conditional(tmp[1], tmp[3], sign);
1207
1208 if (!skip) {
1209 point_add(nq[0], nq[1], nq[2],
1210 nq[0], nq[1], nq[2],
1211 mixed, tmp[0], tmp[1], tmp[2]);
1212 } else {
1213 memcpy(nq, tmp, 3 * sizeof(felem));
1214 skip = 0;
1215 }
1216 }
1217 }
1218 }
1219 felem_assign(x_out, nq[0]);
1220 felem_assign(y_out, nq[1]);
1221 felem_assign(z_out, nq[2]);
1222 }
1223
1224 /******************************************************************************/
1225 /*
1226 * FUNCTIONS TO MANAGE PRECOMPUTATION
1227 */
1228
1229 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1230 {
1231 NISTP224_PRE_COMP *ret = NULL;
1232 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1233 if (!ret) {
1234 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1235 return ret;
1236 }
1237 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1238 ret->references = 1;
1239 return ret;
1240 }
1241
1242 static void *nistp224_pre_comp_dup(void *src_)
1243 {
1244 NISTP224_PRE_COMP *src = src_;
1245
1246 /* no need to actually copy, these objects never change! */
1247 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1248
1249 return src_;
1250 }
1251
1252 static void nistp224_pre_comp_free(void *pre_)
1253 {
1254 int i;
1255 NISTP224_PRE_COMP *pre = pre_;
1256
1257 if (!pre)
1258 return;
1259
1260 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1261 if (i > 0)
1262 return;
1263
1264 OPENSSL_free(pre);
1265 }
1266
1267 static void nistp224_pre_comp_clear_free(void *pre_)
1268 {
1269 int i;
1270 NISTP224_PRE_COMP *pre = pre_;
1271
1272 if (!pre)
1273 return;
1274
1275 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1276 if (i > 0)
1277 return;
1278
1279 OPENSSL_cleanse(pre, sizeof *pre);
1280 OPENSSL_free(pre);
1281 }
1282
1283 /******************************************************************************/
1284 /*
1285 * OPENSSL EC_METHOD FUNCTIONS
1286 */
1287
1288 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1289 {
1290 int ret;
1291 ret = ec_GFp_simple_group_init(group);
1292 group->a_is_minus3 = 1;
1293 return ret;
1294 }
1295
1296 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1297 const BIGNUM *a, const BIGNUM *b,
1298 BN_CTX *ctx)
1299 {
1300 int ret = 0;
1301 BN_CTX *new_ctx = NULL;
1302 BIGNUM *curve_p, *curve_a, *curve_b;
1303
1304 if (ctx == NULL)
1305 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1306 return 0;
1307 BN_CTX_start(ctx);
1308 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1309 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1310 ((curve_b = BN_CTX_get(ctx)) == NULL))
1311 goto err;
1312 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1313 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1314 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1315 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1316 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1317 EC_R_WRONG_CURVE_PARAMETERS);
1318 goto err;
1319 }
1320 group->field_mod_func = BN_nist_mod_224;
1321 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1322 err:
1323 BN_CTX_end(ctx);
1324 if (new_ctx != NULL)
1325 BN_CTX_free(new_ctx);
1326 return ret;
1327 }
1328
1329 /*
1330 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1331 * (X/Z^2, Y/Z^3)
1332 */
1333 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1334 const EC_POINT *point,
1335 BIGNUM *x, BIGNUM *y,
1336 BN_CTX *ctx)
1337 {
1338 felem z1, z2, x_in, y_in, x_out, y_out;
1339 widefelem tmp;
1340
1341 if (EC_POINT_is_at_infinity(group, point)) {
1342 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1343 EC_R_POINT_AT_INFINITY);
1344 return 0;
1345 }
1346 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1347 (!BN_to_felem(z1, &point->Z)))
1348 return 0;
1349 felem_inv(z2, z1);
1350 felem_square(tmp, z2);
1351 felem_reduce(z1, tmp);
1352 felem_mul(tmp, x_in, z1);
1353 felem_reduce(x_in, tmp);
1354 felem_contract(x_out, x_in);
1355 if (x != NULL) {
1356 if (!felem_to_BN(x, x_out)) {
1357 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1358 ERR_R_BN_LIB);
1359 return 0;
1360 }
1361 }
1362 felem_mul(tmp, z1, z2);
1363 felem_reduce(z1, tmp);
1364 felem_mul(tmp, y_in, z1);
1365 felem_reduce(y_in, tmp);
1366 felem_contract(y_out, y_in);
1367 if (y != NULL) {
1368 if (!felem_to_BN(y, y_out)) {
1369 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1370 ERR_R_BN_LIB);
1371 return 0;
1372 }
1373 }
1374 return 1;
1375 }
1376
1377 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1378 felem tmp_felems[ /* num+1 */ ])
1379 {
1380 /*
1381 * Runs in constant time, unless an input is the point at infinity (which
1382 * normally shouldn't happen).
1383 */
1384 ec_GFp_nistp_points_make_affine_internal(num,
1385 points,
1386 sizeof(felem),
1387 tmp_felems,
1388 (void (*)(void *))felem_one,
1389 (int (*)(const void *))
1390 felem_is_zero_int,
1391 (void (*)(void *, const void *))
1392 felem_assign,
1393 (void (*)(void *, const void *))
1394 felem_square_reduce, (void (*)
1395 (void *,
1396 const void
1397 *,
1398 const void
1399 *))
1400 felem_mul_reduce,
1401 (void (*)(void *, const void *))
1402 felem_inv,
1403 (void (*)(void *, const void *))
1404 felem_contract);
1405 }
1406
1407 /*
1408 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409 * values Result is stored in r (r can equal one of the inputs).
1410 */
1411 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412 const BIGNUM *scalar, size_t num,
1413 const EC_POINT *points[],
1414 const BIGNUM *scalars[], BN_CTX *ctx)
1415 {
1416 int ret = 0;
1417 int j;
1418 unsigned i;
1419 int mixed = 0;
1420 BN_CTX *new_ctx = NULL;
1421 BIGNUM *x, *y, *z, *tmp_scalar;
1422 felem_bytearray g_secret;
1423 felem_bytearray *secrets = NULL;
1424 felem(*pre_comp)[17][3] = NULL;
1425 felem *tmp_felems = NULL;
1426 felem_bytearray tmp;
1427 unsigned num_bytes;
1428 int have_pre_comp = 0;
1429 size_t num_points = num;
1430 felem x_in, y_in, z_in, x_out, y_out, z_out;
1431 NISTP224_PRE_COMP *pre = NULL;
1432 const felem(*g_pre_comp)[16][3] = NULL;
1433 EC_POINT *generator = NULL;
1434 const EC_POINT *p = NULL;
1435 const BIGNUM *p_scalar = NULL;
1436
1437 if (ctx == NULL)
1438 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1439 return 0;
1440 BN_CTX_start(ctx);
1441 if (((x = BN_CTX_get(ctx)) == NULL) ||
1442 ((y = BN_CTX_get(ctx)) == NULL) ||
1443 ((z = BN_CTX_get(ctx)) == NULL) ||
1444 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1445 goto err;
1446
1447 if (scalar != NULL) {
1448 pre = EC_EX_DATA_get_data(group->extra_data,
1449 nistp224_pre_comp_dup,
1450 nistp224_pre_comp_free,
1451 nistp224_pre_comp_clear_free);
1452 if (pre)
1453 /* we have precomputation, try to use it */
1454 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1455 else
1456 /* try to use the standard precomputation */
1457 g_pre_comp = &gmul[0];
1458 generator = EC_POINT_new(group);
1459 if (generator == NULL)
1460 goto err;
1461 /* get the generator from precomputation */
1462 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1463 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1464 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1465 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1466 goto err;
1467 }
1468 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1469 generator, x, y, z,
1470 ctx))
1471 goto err;
1472 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1473 /* precomputation matches generator */
1474 have_pre_comp = 1;
1475 else
1476 /*
1477 * we don't have valid precomputation: treat the generator as a
1478 * random point
1479 */
1480 num_points = num_points + 1;
1481 }
1482
1483 if (num_points > 0) {
1484 if (num_points >= 3) {
1485 /*
1486 * unless we precompute multiples for just one or two points,
1487 * converting those into affine form is time well spent
1488 */
1489 mixed = 1;
1490 }
1491 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1492 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1493 if (mixed)
1494 tmp_felems =
1495 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1496 if ((secrets == NULL) || (pre_comp == NULL)
1497 || (mixed && (tmp_felems == NULL))) {
1498 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1499 goto err;
1500 }
1501
1502 /*
1503 * we treat NULL scalars as 0, and NULL points as points at infinity,
1504 * i.e., they contribute nothing to the linear combination
1505 */
1506 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1507 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1508 for (i = 0; i < num_points; ++i) {
1509 if (i == num)
1510 /* the generator */
1511 {
1512 p = EC_GROUP_get0_generator(group);
1513 p_scalar = scalar;
1514 } else
1515 /* the i^th point */
1516 {
1517 p = points[i];
1518 p_scalar = scalars[i];
1519 }
1520 if ((p_scalar != NULL) && (p != NULL)) {
1521 /* reduce scalar to 0 <= scalar < 2^224 */
1522 if ((BN_num_bits(p_scalar) > 224)
1523 || (BN_is_negative(p_scalar))) {
1524 /*
1525 * this is an unusual input, and we don't guarantee
1526 * constant-timeness
1527 */
1528 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1529 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1530 goto err;
1531 }
1532 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1533 } else
1534 num_bytes = BN_bn2bin(p_scalar, tmp);
1535 flip_endian(secrets[i], tmp, num_bytes);
1536 /* precompute multiples */
1537 if ((!BN_to_felem(x_out, &p->X)) ||
1538 (!BN_to_felem(y_out, &p->Y)) ||
1539 (!BN_to_felem(z_out, &p->Z)))
1540 goto err;
1541 felem_assign(pre_comp[i][1][0], x_out);
1542 felem_assign(pre_comp[i][1][1], y_out);
1543 felem_assign(pre_comp[i][1][2], z_out);
1544 for (j = 2; j <= 16; ++j) {
1545 if (j & 1) {
1546 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1547 pre_comp[i][j][2], pre_comp[i][1][0],
1548 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1549 pre_comp[i][j - 1][0],
1550 pre_comp[i][j - 1][1],
1551 pre_comp[i][j - 1][2]);
1552 } else {
1553 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1554 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1555 pre_comp[i][j / 2][1],
1556 pre_comp[i][j / 2][2]);
1557 }
1558 }
1559 }
1560 }
1561 if (mixed)
1562 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1563 }
1564
1565 /* the scalar for the generator */
1566 if ((scalar != NULL) && (have_pre_comp)) {
1567 memset(g_secret, 0, sizeof g_secret);
1568 /* reduce scalar to 0 <= scalar < 2^224 */
1569 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1570 /*
1571 * this is an unusual input, and we don't guarantee
1572 * constant-timeness
1573 */
1574 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1575 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1576 goto err;
1577 }
1578 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1579 } else
1580 num_bytes = BN_bn2bin(scalar, tmp);
1581 flip_endian(g_secret, tmp, num_bytes);
1582 /* do the multiplication with generator precomputation */
1583 batch_mul(x_out, y_out, z_out,
1584 (const felem_bytearray(*))secrets, num_points,
1585 g_secret,
1586 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1587 } else
1588 /* do the multiplication without generator precomputation */
1589 batch_mul(x_out, y_out, z_out,
1590 (const felem_bytearray(*))secrets, num_points,
1591 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1592 /* reduce the output to its unique minimal representation */
1593 felem_contract(x_in, x_out);
1594 felem_contract(y_in, y_out);
1595 felem_contract(z_in, z_out);
1596 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1597 (!felem_to_BN(z, z_in))) {
1598 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1599 goto err;
1600 }
1601 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1602
1603 err:
1604 BN_CTX_end(ctx);
1605 if (generator != NULL)
1606 EC_POINT_free(generator);
1607 if (new_ctx != NULL)
1608 BN_CTX_free(new_ctx);
1609 if (secrets != NULL)
1610 OPENSSL_free(secrets);
1611 if (pre_comp != NULL)
1612 OPENSSL_free(pre_comp);
1613 if (tmp_felems != NULL)
1614 OPENSSL_free(tmp_felems);
1615 return ret;
1616 }
1617
1618 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1619 {
1620 int ret = 0;
1621 NISTP224_PRE_COMP *pre = NULL;
1622 int i, j;
1623 BN_CTX *new_ctx = NULL;
1624 BIGNUM *x, *y;
1625 EC_POINT *generator = NULL;
1626 felem tmp_felems[32];
1627
1628 /* throw away old precomputation */
1629 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1630 nistp224_pre_comp_free,
1631 nistp224_pre_comp_clear_free);
1632 if (ctx == NULL)
1633 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1634 return 0;
1635 BN_CTX_start(ctx);
1636 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1637 goto err;
1638 /* get the generator */
1639 if (group->generator == NULL)
1640 goto err;
1641 generator = EC_POINT_new(group);
1642 if (generator == NULL)
1643 goto err;
1644 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1645 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1646 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1647 goto err;
1648 if ((pre = nistp224_pre_comp_new()) == NULL)
1649 goto err;
1650 /*
1651 * if the generator is the standard one, use built-in precomputation
1652 */
1653 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1654 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1655 ret = 1;
1656 goto err;
1657 }
1658 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1659 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1660 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1661 goto err;
1662 /*
1663 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1664 * 2^140*G, 2^196*G for the second one
1665 */
1666 for (i = 1; i <= 8; i <<= 1) {
1667 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1668 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1669 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1670 for (j = 0; j < 27; ++j) {
1671 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1672 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1673 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1674 }
1675 if (i == 8)
1676 break;
1677 point_double(pre->g_pre_comp[0][2 * i][0],
1678 pre->g_pre_comp[0][2 * i][1],
1679 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1680 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1681 for (j = 0; j < 27; ++j) {
1682 point_double(pre->g_pre_comp[0][2 * i][0],
1683 pre->g_pre_comp[0][2 * i][1],
1684 pre->g_pre_comp[0][2 * i][2],
1685 pre->g_pre_comp[0][2 * i][0],
1686 pre->g_pre_comp[0][2 * i][1],
1687 pre->g_pre_comp[0][2 * i][2]);
1688 }
1689 }
1690 for (i = 0; i < 2; i++) {
1691 /* g_pre_comp[i][0] is the point at infinity */
1692 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1693 /* the remaining multiples */
1694 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1695 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1696 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1697 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1698 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1699 pre->g_pre_comp[i][2][2]);
1700 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1701 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1702 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1703 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1704 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1705 pre->g_pre_comp[i][2][2]);
1706 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1707 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1708 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1709 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1710 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1711 pre->g_pre_comp[i][4][2]);
1712 /*
1713 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1714 */
1715 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1716 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1717 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1718 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1719 pre->g_pre_comp[i][2][2]);
1720 for (j = 1; j < 8; ++j) {
1721 /* odd multiples: add G resp. 2^28*G */
1722 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1723 pre->g_pre_comp[i][2 * j + 1][1],
1724 pre->g_pre_comp[i][2 * j + 1][2],
1725 pre->g_pre_comp[i][2 * j][0],
1726 pre->g_pre_comp[i][2 * j][1],
1727 pre->g_pre_comp[i][2 * j][2], 0,
1728 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1729 pre->g_pre_comp[i][1][2]);
1730 }
1731 }
1732 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1733
1734 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1735 nistp224_pre_comp_free,
1736 nistp224_pre_comp_clear_free))
1737 goto err;
1738 ret = 1;
1739 pre = NULL;
1740 err:
1741 BN_CTX_end(ctx);
1742 if (generator != NULL)
1743 EC_POINT_free(generator);
1744 if (new_ctx != NULL)
1745 BN_CTX_free(new_ctx);
1746 if (pre)
1747 nistp224_pre_comp_free(pre);
1748 return ret;
1749 }
1750
1751 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1752 {
1753 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1754 nistp224_pre_comp_free,
1755 nistp224_pre_comp_clear_free)
1756 != NULL)
1757 return 1;
1758 else
1759 return 0;
1760 }
1761
1762 #else
1763 static void *dummy = &dummy;
1764 #endif