]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ec/ecp_nistp224.c
Remove /* foo.c */ comments
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
1 /*
2 * Written by Emilia Kasper (Google) for the OpenSSL project.
3 */
4 /* Copyright 2011 Google Inc.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 *
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 /*
21 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
22 *
23 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
24 * and Adam Langley's public domain 64-bit C implementation of curve25519
25 */
26
27 #include <openssl/opensslconf.h>
28 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
29
30 # include <stdint.h>
31 # include <string.h>
32 # include <openssl/err.h>
33 # include "ec_lcl.h"
34
35 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
36 /* even with gcc, the typedef won't work for 32-bit platforms */
37 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
38 * platforms */
39 # else
40 # error "Need GCC 3.1 or later to define type uint128_t"
41 # endif
42
43 typedef uint8_t u8;
44 typedef uint64_t u64;
45 typedef int64_t s64;
46
47 /******************************************************************************/
48 /*-
49 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
50 *
51 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
52 * using 64-bit coefficients called 'limbs',
53 * and sometimes (for multiplication results) as
54 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
55 * using 128-bit coefficients called 'widelimbs'.
56 * A 4-limb representation is an 'felem';
57 * a 7-widelimb representation is a 'widefelem'.
58 * Even within felems, bits of adjacent limbs overlap, and we don't always
59 * reduce the representations: we ensure that inputs to each felem
60 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
61 * and fit into a 128-bit word without overflow. The coefficients are then
62 * again partially reduced to obtain an felem satisfying a_i < 2^57.
63 * We only reduce to the unique minimal representation at the end of the
64 * computation.
65 */
66
67 typedef uint64_t limb;
68 typedef uint128_t widelimb;
69
70 typedef limb felem[4];
71 typedef widelimb widefelem[7];
72
73 /*
74 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
75 * group order size for the elliptic curve, and we also use this type for
76 * scalars for point multiplication.
77 */
78 typedef u8 felem_bytearray[28];
79
80 static const felem_bytearray nistp224_curve_params[5] = {
81 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
82 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
83 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
84 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
85 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
87 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
88 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
89 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
90 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
91 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
92 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
93 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
94 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
95 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
96 };
97
98 /*-
99 * Precomputed multiples of the standard generator
100 * Points are given in coordinates (X, Y, Z) where Z normally is 1
101 * (0 for the point at infinity).
102 * For each field element, slice a_0 is word 0, etc.
103 *
104 * The table has 2 * 16 elements, starting with the following:
105 * index | bits | point
106 * ------+---------+------------------------------
107 * 0 | 0 0 0 0 | 0G
108 * 1 | 0 0 0 1 | 1G
109 * 2 | 0 0 1 0 | 2^56G
110 * 3 | 0 0 1 1 | (2^56 + 1)G
111 * 4 | 0 1 0 0 | 2^112G
112 * 5 | 0 1 0 1 | (2^112 + 1)G
113 * 6 | 0 1 1 0 | (2^112 + 2^56)G
114 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
115 * 8 | 1 0 0 0 | 2^168G
116 * 9 | 1 0 0 1 | (2^168 + 1)G
117 * 10 | 1 0 1 0 | (2^168 + 2^56)G
118 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
119 * 12 | 1 1 0 0 | (2^168 + 2^112)G
120 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
121 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
122 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
123 * followed by a copy of this with each element multiplied by 2^28.
124 *
125 * The reason for this is so that we can clock bits into four different
126 * locations when doing simple scalar multiplies against the base point,
127 * and then another four locations using the second 16 elements.
128 */
129 static const felem gmul[2][16][3] = {
130 {{{0, 0, 0, 0},
131 {0, 0, 0, 0},
132 {0, 0, 0, 0}},
133 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
134 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
135 {1, 0, 0, 0}},
136 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
137 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
138 {1, 0, 0, 0}},
139 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
140 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
141 {1, 0, 0, 0}},
142 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
143 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
144 {1, 0, 0, 0}},
145 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
146 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
147 {1, 0, 0, 0}},
148 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
149 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
150 {1, 0, 0, 0}},
151 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
152 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
153 {1, 0, 0, 0}},
154 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
155 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
156 {1, 0, 0, 0}},
157 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
158 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
159 {1, 0, 0, 0}},
160 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
161 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
162 {1, 0, 0, 0}},
163 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
164 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
165 {1, 0, 0, 0}},
166 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
167 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
168 {1, 0, 0, 0}},
169 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
170 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
171 {1, 0, 0, 0}},
172 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
173 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
174 {1, 0, 0, 0}},
175 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
176 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
177 {1, 0, 0, 0}}},
178 {{{0, 0, 0, 0},
179 {0, 0, 0, 0},
180 {0, 0, 0, 0}},
181 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
182 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
183 {1, 0, 0, 0}},
184 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
185 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
186 {1, 0, 0, 0}},
187 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
188 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
189 {1, 0, 0, 0}},
190 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
191 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
192 {1, 0, 0, 0}},
193 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
194 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
195 {1, 0, 0, 0}},
196 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
197 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
198 {1, 0, 0, 0}},
199 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
200 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
201 {1, 0, 0, 0}},
202 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
203 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
204 {1, 0, 0, 0}},
205 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
206 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
207 {1, 0, 0, 0}},
208 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
209 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
210 {1, 0, 0, 0}},
211 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
212 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
213 {1, 0, 0, 0}},
214 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
215 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
216 {1, 0, 0, 0}},
217 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
218 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
219 {1, 0, 0, 0}},
220 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
221 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
222 {1, 0, 0, 0}},
223 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
224 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
225 {1, 0, 0, 0}}}
226 };
227
228 /* Precomputation for the group generator. */
229 struct nistp224_pre_comp_st {
230 felem g_pre_comp[2][16][3];
231 int references;
232 };
233
234 const EC_METHOD *EC_GFp_nistp224_method(void)
235 {
236 static const EC_METHOD ret = {
237 EC_FLAGS_DEFAULT_OCT,
238 NID_X9_62_prime_field,
239 ec_GFp_nistp224_group_init,
240 ec_GFp_simple_group_finish,
241 ec_GFp_simple_group_clear_finish,
242 ec_GFp_nist_group_copy,
243 ec_GFp_nistp224_group_set_curve,
244 ec_GFp_simple_group_get_curve,
245 ec_GFp_simple_group_get_degree,
246 ec_GFp_simple_group_check_discriminant,
247 ec_GFp_simple_point_init,
248 ec_GFp_simple_point_finish,
249 ec_GFp_simple_point_clear_finish,
250 ec_GFp_simple_point_copy,
251 ec_GFp_simple_point_set_to_infinity,
252 ec_GFp_simple_set_Jprojective_coordinates_GFp,
253 ec_GFp_simple_get_Jprojective_coordinates_GFp,
254 ec_GFp_simple_point_set_affine_coordinates,
255 ec_GFp_nistp224_point_get_affine_coordinates,
256 0 /* point_set_compressed_coordinates */ ,
257 0 /* point2oct */ ,
258 0 /* oct2point */ ,
259 ec_GFp_simple_add,
260 ec_GFp_simple_dbl,
261 ec_GFp_simple_invert,
262 ec_GFp_simple_is_at_infinity,
263 ec_GFp_simple_is_on_curve,
264 ec_GFp_simple_cmp,
265 ec_GFp_simple_make_affine,
266 ec_GFp_simple_points_make_affine,
267 ec_GFp_nistp224_points_mul,
268 ec_GFp_nistp224_precompute_mult,
269 ec_GFp_nistp224_have_precompute_mult,
270 ec_GFp_nist_field_mul,
271 ec_GFp_nist_field_sqr,
272 0 /* field_div */ ,
273 0 /* field_encode */ ,
274 0 /* field_decode */ ,
275 0 /* field_set_to_one */
276 };
277
278 return &ret;
279 }
280
281 /*
282 * Helper functions to convert field elements to/from internal representation
283 */
284 static void bin28_to_felem(felem out, const u8 in[28])
285 {
286 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
287 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
288 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
289 out[3] = (*((const uint64_t *)(in+20))) >> 8;
290 }
291
292 static void felem_to_bin28(u8 out[28], const felem in)
293 {
294 unsigned i;
295 for (i = 0; i < 7; ++i) {
296 out[i] = in[0] >> (8 * i);
297 out[i + 7] = in[1] >> (8 * i);
298 out[i + 14] = in[2] >> (8 * i);
299 out[i + 21] = in[3] >> (8 * i);
300 }
301 }
302
303 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
304 static void flip_endian(u8 *out, const u8 *in, unsigned len)
305 {
306 unsigned i;
307 for (i = 0; i < len; ++i)
308 out[i] = in[len - 1 - i];
309 }
310
311 /* From OpenSSL BIGNUM to internal representation */
312 static int BN_to_felem(felem out, const BIGNUM *bn)
313 {
314 felem_bytearray b_in;
315 felem_bytearray b_out;
316 unsigned num_bytes;
317
318 /* BN_bn2bin eats leading zeroes */
319 memset(b_out, 0, sizeof(b_out));
320 num_bytes = BN_num_bytes(bn);
321 if (num_bytes > sizeof b_out) {
322 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
323 return 0;
324 }
325 if (BN_is_negative(bn)) {
326 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
327 return 0;
328 }
329 num_bytes = BN_bn2bin(bn, b_in);
330 flip_endian(b_out, b_in, num_bytes);
331 bin28_to_felem(out, b_out);
332 return 1;
333 }
334
335 /* From internal representation to OpenSSL BIGNUM */
336 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
337 {
338 felem_bytearray b_in, b_out;
339 felem_to_bin28(b_in, in);
340 flip_endian(b_out, b_in, sizeof b_out);
341 return BN_bin2bn(b_out, sizeof b_out, out);
342 }
343
344 /******************************************************************************/
345 /*-
346 * FIELD OPERATIONS
347 *
348 * Field operations, using the internal representation of field elements.
349 * NB! These operations are specific to our point multiplication and cannot be
350 * expected to be correct in general - e.g., multiplication with a large scalar
351 * will cause an overflow.
352 *
353 */
354
355 static void felem_one(felem out)
356 {
357 out[0] = 1;
358 out[1] = 0;
359 out[2] = 0;
360 out[3] = 0;
361 }
362
363 static void felem_assign(felem out, const felem in)
364 {
365 out[0] = in[0];
366 out[1] = in[1];
367 out[2] = in[2];
368 out[3] = in[3];
369 }
370
371 /* Sum two field elements: out += in */
372 static void felem_sum(felem out, const felem in)
373 {
374 out[0] += in[0];
375 out[1] += in[1];
376 out[2] += in[2];
377 out[3] += in[3];
378 }
379
380 /* Get negative value: out = -in */
381 /* Assumes in[i] < 2^57 */
382 static void felem_neg(felem out, const felem in)
383 {
384 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
385 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
386 static const limb two58m42m2 = (((limb) 1) << 58) -
387 (((limb) 1) << 42) - (((limb) 1) << 2);
388
389 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
390 out[0] = two58p2 - in[0];
391 out[1] = two58m42m2 - in[1];
392 out[2] = two58m2 - in[2];
393 out[3] = two58m2 - in[3];
394 }
395
396 /* Subtract field elements: out -= in */
397 /* Assumes in[i] < 2^57 */
398 static void felem_diff(felem out, const felem in)
399 {
400 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
401 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
402 static const limb two58m42m2 = (((limb) 1) << 58) -
403 (((limb) 1) << 42) - (((limb) 1) << 2);
404
405 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
406 out[0] += two58p2;
407 out[1] += two58m42m2;
408 out[2] += two58m2;
409 out[3] += two58m2;
410
411 out[0] -= in[0];
412 out[1] -= in[1];
413 out[2] -= in[2];
414 out[3] -= in[3];
415 }
416
417 /* Subtract in unreduced 128-bit mode: out -= in */
418 /* Assumes in[i] < 2^119 */
419 static void widefelem_diff(widefelem out, const widefelem in)
420 {
421 static const widelimb two120 = ((widelimb) 1) << 120;
422 static const widelimb two120m64 = (((widelimb) 1) << 120) -
423 (((widelimb) 1) << 64);
424 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
425 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
426
427 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
428 out[0] += two120;
429 out[1] += two120m64;
430 out[2] += two120m64;
431 out[3] += two120;
432 out[4] += two120m104m64;
433 out[5] += two120m64;
434 out[6] += two120m64;
435
436 out[0] -= in[0];
437 out[1] -= in[1];
438 out[2] -= in[2];
439 out[3] -= in[3];
440 out[4] -= in[4];
441 out[5] -= in[5];
442 out[6] -= in[6];
443 }
444
445 /* Subtract in mixed mode: out128 -= in64 */
446 /* in[i] < 2^63 */
447 static void felem_diff_128_64(widefelem out, const felem in)
448 {
449 static const widelimb two64p8 = (((widelimb) 1) << 64) +
450 (((widelimb) 1) << 8);
451 static const widelimb two64m8 = (((widelimb) 1) << 64) -
452 (((widelimb) 1) << 8);
453 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
454 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
455
456 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
457 out[0] += two64p8;
458 out[1] += two64m48m8;
459 out[2] += two64m8;
460 out[3] += two64m8;
461
462 out[0] -= in[0];
463 out[1] -= in[1];
464 out[2] -= in[2];
465 out[3] -= in[3];
466 }
467
468 /*
469 * Multiply a field element by a scalar: out = out * scalar The scalars we
470 * actually use are small, so results fit without overflow
471 */
472 static void felem_scalar(felem out, const limb scalar)
473 {
474 out[0] *= scalar;
475 out[1] *= scalar;
476 out[2] *= scalar;
477 out[3] *= scalar;
478 }
479
480 /*
481 * Multiply an unreduced field element by a scalar: out = out * scalar The
482 * scalars we actually use are small, so results fit without overflow
483 */
484 static void widefelem_scalar(widefelem out, const widelimb scalar)
485 {
486 out[0] *= scalar;
487 out[1] *= scalar;
488 out[2] *= scalar;
489 out[3] *= scalar;
490 out[4] *= scalar;
491 out[5] *= scalar;
492 out[6] *= scalar;
493 }
494
495 /* Square a field element: out = in^2 */
496 static void felem_square(widefelem out, const felem in)
497 {
498 limb tmp0, tmp1, tmp2;
499 tmp0 = 2 * in[0];
500 tmp1 = 2 * in[1];
501 tmp2 = 2 * in[2];
502 out[0] = ((widelimb) in[0]) * in[0];
503 out[1] = ((widelimb) in[0]) * tmp1;
504 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
505 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
506 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
507 out[5] = ((widelimb) in[3]) * tmp2;
508 out[6] = ((widelimb) in[3]) * in[3];
509 }
510
511 /* Multiply two field elements: out = in1 * in2 */
512 static void felem_mul(widefelem out, const felem in1, const felem in2)
513 {
514 out[0] = ((widelimb) in1[0]) * in2[0];
515 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
516 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
517 ((widelimb) in1[2]) * in2[0];
518 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
519 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
520 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
521 ((widelimb) in1[3]) * in2[1];
522 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
523 out[6] = ((widelimb) in1[3]) * in2[3];
524 }
525
526 /*-
527 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
528 * Requires in[i] < 2^126,
529 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
530 static void felem_reduce(felem out, const widefelem in)
531 {
532 static const widelimb two127p15 = (((widelimb) 1) << 127) +
533 (((widelimb) 1) << 15);
534 static const widelimb two127m71 = (((widelimb) 1) << 127) -
535 (((widelimb) 1) << 71);
536 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
537 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
538 widelimb output[5];
539
540 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
541 output[0] = in[0] + two127p15;
542 output[1] = in[1] + two127m71m55;
543 output[2] = in[2] + two127m71;
544 output[3] = in[3];
545 output[4] = in[4];
546
547 /* Eliminate in[4], in[5], in[6] */
548 output[4] += in[6] >> 16;
549 output[3] += (in[6] & 0xffff) << 40;
550 output[2] -= in[6];
551
552 output[3] += in[5] >> 16;
553 output[2] += (in[5] & 0xffff) << 40;
554 output[1] -= in[5];
555
556 output[2] += output[4] >> 16;
557 output[1] += (output[4] & 0xffff) << 40;
558 output[0] -= output[4];
559
560 /* Carry 2 -> 3 -> 4 */
561 output[3] += output[2] >> 56;
562 output[2] &= 0x00ffffffffffffff;
563
564 output[4] = output[3] >> 56;
565 output[3] &= 0x00ffffffffffffff;
566
567 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
568
569 /* Eliminate output[4] */
570 output[2] += output[4] >> 16;
571 /* output[2] < 2^56 + 2^56 = 2^57 */
572 output[1] += (output[4] & 0xffff) << 40;
573 output[0] -= output[4];
574
575 /* Carry 0 -> 1 -> 2 -> 3 */
576 output[1] += output[0] >> 56;
577 out[0] = output[0] & 0x00ffffffffffffff;
578
579 output[2] += output[1] >> 56;
580 /* output[2] < 2^57 + 2^72 */
581 out[1] = output[1] & 0x00ffffffffffffff;
582 output[3] += output[2] >> 56;
583 /* output[3] <= 2^56 + 2^16 */
584 out[2] = output[2] & 0x00ffffffffffffff;
585
586 /*-
587 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
588 * out[3] <= 2^56 + 2^16 (due to final carry),
589 * so out < 2*p
590 */
591 out[3] = output[3];
592 }
593
594 static void felem_square_reduce(felem out, const felem in)
595 {
596 widefelem tmp;
597 felem_square(tmp, in);
598 felem_reduce(out, tmp);
599 }
600
601 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
602 {
603 widefelem tmp;
604 felem_mul(tmp, in1, in2);
605 felem_reduce(out, tmp);
606 }
607
608 /*
609 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
610 * call felem_reduce first)
611 */
612 static void felem_contract(felem out, const felem in)
613 {
614 static const int64_t two56 = ((limb) 1) << 56;
615 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
616 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
617 int64_t tmp[4], a;
618 tmp[0] = in[0];
619 tmp[1] = in[1];
620 tmp[2] = in[2];
621 tmp[3] = in[3];
622 /* Case 1: a = 1 iff in >= 2^224 */
623 a = (in[3] >> 56);
624 tmp[0] -= a;
625 tmp[1] += a << 40;
626 tmp[3] &= 0x00ffffffffffffff;
627 /*
628 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
629 * and the lower part is non-zero
630 */
631 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
632 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
633 a &= 0x00ffffffffffffff;
634 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
635 a = (a - 1) >> 63;
636 /* subtract 2^224 - 2^96 + 1 if a is all-one */
637 tmp[3] &= a ^ 0xffffffffffffffff;
638 tmp[2] &= a ^ 0xffffffffffffffff;
639 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
640 tmp[0] -= 1 & a;
641
642 /*
643 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
644 * non-zero, so we only need one step
645 */
646 a = tmp[0] >> 63;
647 tmp[0] += two56 & a;
648 tmp[1] -= 1 & a;
649
650 /* carry 1 -> 2 -> 3 */
651 tmp[2] += tmp[1] >> 56;
652 tmp[1] &= 0x00ffffffffffffff;
653
654 tmp[3] += tmp[2] >> 56;
655 tmp[2] &= 0x00ffffffffffffff;
656
657 /* Now 0 <= out < p */
658 out[0] = tmp[0];
659 out[1] = tmp[1];
660 out[2] = tmp[2];
661 out[3] = tmp[3];
662 }
663
664 /*
665 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
666 * elements are reduced to in < 2^225, so we only need to check three cases:
667 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
668 */
669 static limb felem_is_zero(const felem in)
670 {
671 limb zero, two224m96p1, two225m97p2;
672
673 zero = in[0] | in[1] | in[2] | in[3];
674 zero = (((int64_t) (zero) - 1) >> 63) & 1;
675 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
676 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
677 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
678 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
679 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
680 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
681 return (zero | two224m96p1 | two225m97p2);
682 }
683
684 static limb felem_is_zero_int(const felem in)
685 {
686 return (int)(felem_is_zero(in) & ((limb) 1));
687 }
688
689 /* Invert a field element */
690 /* Computation chain copied from djb's code */
691 static void felem_inv(felem out, const felem in)
692 {
693 felem ftmp, ftmp2, ftmp3, ftmp4;
694 widefelem tmp;
695 unsigned i;
696
697 felem_square(tmp, in);
698 felem_reduce(ftmp, tmp); /* 2 */
699 felem_mul(tmp, in, ftmp);
700 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
701 felem_square(tmp, ftmp);
702 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
703 felem_mul(tmp, in, ftmp);
704 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
705 felem_square(tmp, ftmp);
706 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
707 felem_square(tmp, ftmp2);
708 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
709 felem_square(tmp, ftmp2);
710 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
711 felem_mul(tmp, ftmp2, ftmp);
712 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
713 felem_square(tmp, ftmp);
714 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
715 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
716 felem_square(tmp, ftmp2);
717 felem_reduce(ftmp2, tmp);
718 }
719 felem_mul(tmp, ftmp2, ftmp);
720 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
721 felem_square(tmp, ftmp2);
722 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
723 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
724 felem_square(tmp, ftmp3);
725 felem_reduce(ftmp3, tmp);
726 }
727 felem_mul(tmp, ftmp3, ftmp2);
728 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
729 felem_square(tmp, ftmp2);
730 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
731 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
732 felem_square(tmp, ftmp3);
733 felem_reduce(ftmp3, tmp);
734 }
735 felem_mul(tmp, ftmp3, ftmp2);
736 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
739 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
740 felem_square(tmp, ftmp4);
741 felem_reduce(ftmp4, tmp);
742 }
743 felem_mul(tmp, ftmp3, ftmp4);
744 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
745 felem_square(tmp, ftmp3);
746 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
747 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
748 felem_square(tmp, ftmp4);
749 felem_reduce(ftmp4, tmp);
750 }
751 felem_mul(tmp, ftmp2, ftmp4);
752 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
753 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
754 felem_square(tmp, ftmp2);
755 felem_reduce(ftmp2, tmp);
756 }
757 felem_mul(tmp, ftmp2, ftmp);
758 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
759 felem_square(tmp, ftmp);
760 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
761 felem_mul(tmp, ftmp, in);
762 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
763 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
764 felem_square(tmp, ftmp);
765 felem_reduce(ftmp, tmp);
766 }
767 felem_mul(tmp, ftmp, ftmp3);
768 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
769 }
770
771 /*
772 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
773 * out to itself.
774 */
775 static void copy_conditional(felem out, const felem in, limb icopy)
776 {
777 unsigned i;
778 /*
779 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
780 */
781 const limb copy = -icopy;
782 for (i = 0; i < 4; ++i) {
783 const limb tmp = copy & (in[i] ^ out[i]);
784 out[i] ^= tmp;
785 }
786 }
787
788 /******************************************************************************/
789 /*-
790 * ELLIPTIC CURVE POINT OPERATIONS
791 *
792 * Points are represented in Jacobian projective coordinates:
793 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
794 * or to the point at infinity if Z == 0.
795 *
796 */
797
798 /*-
799 * Double an elliptic curve point:
800 * (X', Y', Z') = 2 * (X, Y, Z), where
801 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
802 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
803 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
804 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
805 * while x_out == y_in is not (maybe this works, but it's not tested).
806 */
807 static void
808 point_double(felem x_out, felem y_out, felem z_out,
809 const felem x_in, const felem y_in, const felem z_in)
810 {
811 widefelem tmp, tmp2;
812 felem delta, gamma, beta, alpha, ftmp, ftmp2;
813
814 felem_assign(ftmp, x_in);
815 felem_assign(ftmp2, x_in);
816
817 /* delta = z^2 */
818 felem_square(tmp, z_in);
819 felem_reduce(delta, tmp);
820
821 /* gamma = y^2 */
822 felem_square(tmp, y_in);
823 felem_reduce(gamma, tmp);
824
825 /* beta = x*gamma */
826 felem_mul(tmp, x_in, gamma);
827 felem_reduce(beta, tmp);
828
829 /* alpha = 3*(x-delta)*(x+delta) */
830 felem_diff(ftmp, delta);
831 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
832 felem_sum(ftmp2, delta);
833 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
834 felem_scalar(ftmp2, 3);
835 /* ftmp2[i] < 3 * 2^58 < 2^60 */
836 felem_mul(tmp, ftmp, ftmp2);
837 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
838 felem_reduce(alpha, tmp);
839
840 /* x' = alpha^2 - 8*beta */
841 felem_square(tmp, alpha);
842 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
843 felem_assign(ftmp, beta);
844 felem_scalar(ftmp, 8);
845 /* ftmp[i] < 8 * 2^57 = 2^60 */
846 felem_diff_128_64(tmp, ftmp);
847 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
848 felem_reduce(x_out, tmp);
849
850 /* z' = (y + z)^2 - gamma - delta */
851 felem_sum(delta, gamma);
852 /* delta[i] < 2^57 + 2^57 = 2^58 */
853 felem_assign(ftmp, y_in);
854 felem_sum(ftmp, z_in);
855 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
856 felem_square(tmp, ftmp);
857 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
858 felem_diff_128_64(tmp, delta);
859 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
860 felem_reduce(z_out, tmp);
861
862 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
863 felem_scalar(beta, 4);
864 /* beta[i] < 4 * 2^57 = 2^59 */
865 felem_diff(beta, x_out);
866 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
867 felem_mul(tmp, alpha, beta);
868 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
869 felem_square(tmp2, gamma);
870 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
871 widefelem_scalar(tmp2, 8);
872 /* tmp2[i] < 8 * 2^116 = 2^119 */
873 widefelem_diff(tmp, tmp2);
874 /* tmp[i] < 2^119 + 2^120 < 2^121 */
875 felem_reduce(y_out, tmp);
876 }
877
878 /*-
879 * Add two elliptic curve points:
880 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
881 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
882 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
883 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
884 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
885 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
886 *
887 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
888 */
889
890 /*
891 * This function is not entirely constant-time: it includes a branch for
892 * checking whether the two input points are equal, (while not equal to the
893 * point at infinity). This case never happens during single point
894 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
895 */
896 static void point_add(felem x3, felem y3, felem z3,
897 const felem x1, const felem y1, const felem z1,
898 const int mixed, const felem x2, const felem y2,
899 const felem z2)
900 {
901 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
902 widefelem tmp, tmp2;
903 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
904
905 if (!mixed) {
906 /* ftmp2 = z2^2 */
907 felem_square(tmp, z2);
908 felem_reduce(ftmp2, tmp);
909
910 /* ftmp4 = z2^3 */
911 felem_mul(tmp, ftmp2, z2);
912 felem_reduce(ftmp4, tmp);
913
914 /* ftmp4 = z2^3*y1 */
915 felem_mul(tmp2, ftmp4, y1);
916 felem_reduce(ftmp4, tmp2);
917
918 /* ftmp2 = z2^2*x1 */
919 felem_mul(tmp2, ftmp2, x1);
920 felem_reduce(ftmp2, tmp2);
921 } else {
922 /*
923 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
924 */
925
926 /* ftmp4 = z2^3*y1 */
927 felem_assign(ftmp4, y1);
928
929 /* ftmp2 = z2^2*x1 */
930 felem_assign(ftmp2, x1);
931 }
932
933 /* ftmp = z1^2 */
934 felem_square(tmp, z1);
935 felem_reduce(ftmp, tmp);
936
937 /* ftmp3 = z1^3 */
938 felem_mul(tmp, ftmp, z1);
939 felem_reduce(ftmp3, tmp);
940
941 /* tmp = z1^3*y2 */
942 felem_mul(tmp, ftmp3, y2);
943 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
944
945 /* ftmp3 = z1^3*y2 - z2^3*y1 */
946 felem_diff_128_64(tmp, ftmp4);
947 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
948 felem_reduce(ftmp3, tmp);
949
950 /* tmp = z1^2*x2 */
951 felem_mul(tmp, ftmp, x2);
952 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
953
954 /* ftmp = z1^2*x2 - z2^2*x1 */
955 felem_diff_128_64(tmp, ftmp2);
956 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
957 felem_reduce(ftmp, tmp);
958
959 /*
960 * the formulae are incorrect if the points are equal so we check for
961 * this and do doubling if this happens
962 */
963 x_equal = felem_is_zero(ftmp);
964 y_equal = felem_is_zero(ftmp3);
965 z1_is_zero = felem_is_zero(z1);
966 z2_is_zero = felem_is_zero(z2);
967 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
968 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
969 point_double(x3, y3, z3, x1, y1, z1);
970 return;
971 }
972
973 /* ftmp5 = z1*z2 */
974 if (!mixed) {
975 felem_mul(tmp, z1, z2);
976 felem_reduce(ftmp5, tmp);
977 } else {
978 /* special case z2 = 0 is handled later */
979 felem_assign(ftmp5, z1);
980 }
981
982 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
983 felem_mul(tmp, ftmp, ftmp5);
984 felem_reduce(z_out, tmp);
985
986 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
987 felem_assign(ftmp5, ftmp);
988 felem_square(tmp, ftmp);
989 felem_reduce(ftmp, tmp);
990
991 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
992 felem_mul(tmp, ftmp, ftmp5);
993 felem_reduce(ftmp5, tmp);
994
995 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
996 felem_mul(tmp, ftmp2, ftmp);
997 felem_reduce(ftmp2, tmp);
998
999 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1000 felem_mul(tmp, ftmp4, ftmp5);
1001 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1002
1003 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1004 felem_square(tmp2, ftmp3);
1005 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1006
1007 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1008 felem_diff_128_64(tmp2, ftmp5);
1009 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1010
1011 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1012 felem_assign(ftmp5, ftmp2);
1013 felem_scalar(ftmp5, 2);
1014 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1015
1016 /*-
1017 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1018 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1019 */
1020 felem_diff_128_64(tmp2, ftmp5);
1021 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1022 felem_reduce(x_out, tmp2);
1023
1024 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1025 felem_diff(ftmp2, x_out);
1026 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1027
1028 /*
1029 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1030 */
1031 felem_mul(tmp2, ftmp3, ftmp2);
1032 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1033
1034 /*-
1035 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1036 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1037 */
1038 widefelem_diff(tmp2, tmp);
1039 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1040 felem_reduce(y_out, tmp2);
1041
1042 /*
1043 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1044 * the point at infinity, so we need to check for this separately
1045 */
1046
1047 /*
1048 * if point 1 is at infinity, copy point 2 to output, and vice versa
1049 */
1050 copy_conditional(x_out, x2, z1_is_zero);
1051 copy_conditional(x_out, x1, z2_is_zero);
1052 copy_conditional(y_out, y2, z1_is_zero);
1053 copy_conditional(y_out, y1, z2_is_zero);
1054 copy_conditional(z_out, z2, z1_is_zero);
1055 copy_conditional(z_out, z1, z2_is_zero);
1056 felem_assign(x3, x_out);
1057 felem_assign(y3, y_out);
1058 felem_assign(z3, z_out);
1059 }
1060
1061 /*
1062 * select_point selects the |idx|th point from a precomputation table and
1063 * copies it to out.
1064 * The pre_comp array argument should be size of |size| argument
1065 */
1066 static void select_point(const u64 idx, unsigned int size,
1067 const felem pre_comp[][3], felem out[3])
1068 {
1069 unsigned i, j;
1070 limb *outlimbs = &out[0][0];
1071
1072 memset(out, 0, sizeof(*out) * 3);
1073 for (i = 0; i < size; i++) {
1074 const limb *inlimbs = &pre_comp[i][0][0];
1075 u64 mask = i ^ idx;
1076 mask |= mask >> 4;
1077 mask |= mask >> 2;
1078 mask |= mask >> 1;
1079 mask &= 1;
1080 mask--;
1081 for (j = 0; j < 4 * 3; j++)
1082 outlimbs[j] |= inlimbs[j] & mask;
1083 }
1084 }
1085
1086 /* get_bit returns the |i|th bit in |in| */
1087 static char get_bit(const felem_bytearray in, unsigned i)
1088 {
1089 if (i >= 224)
1090 return 0;
1091 return (in[i >> 3] >> (i & 7)) & 1;
1092 }
1093
1094 /*
1095 * Interleaved point multiplication using precomputed point multiples: The
1096 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1097 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1098 * generator, using certain (large) precomputed multiples in g_pre_comp.
1099 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1100 */
1101 static void batch_mul(felem x_out, felem y_out, felem z_out,
1102 const felem_bytearray scalars[],
1103 const unsigned num_points, const u8 *g_scalar,
1104 const int mixed, const felem pre_comp[][17][3],
1105 const felem g_pre_comp[2][16][3])
1106 {
1107 int i, skip;
1108 unsigned num;
1109 unsigned gen_mul = (g_scalar != NULL);
1110 felem nq[3], tmp[4];
1111 u64 bits;
1112 u8 sign, digit;
1113
1114 /* set nq to the point at infinity */
1115 memset(nq, 0, sizeof(nq));
1116
1117 /*
1118 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1119 * of the generator (two in each of the last 28 rounds) and additions of
1120 * other points multiples (every 5th round).
1121 */
1122 skip = 1; /* save two point operations in the first
1123 * round */
1124 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1125 /* double */
1126 if (!skip)
1127 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1128
1129 /* add multiples of the generator */
1130 if (gen_mul && (i <= 27)) {
1131 /* first, look 28 bits upwards */
1132 bits = get_bit(g_scalar, i + 196) << 3;
1133 bits |= get_bit(g_scalar, i + 140) << 2;
1134 bits |= get_bit(g_scalar, i + 84) << 1;
1135 bits |= get_bit(g_scalar, i + 28);
1136 /* select the point to add, in constant time */
1137 select_point(bits, 16, g_pre_comp[1], tmp);
1138
1139 if (!skip) {
1140 /* value 1 below is argument for "mixed" */
1141 point_add(nq[0], nq[1], nq[2],
1142 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1143 } else {
1144 memcpy(nq, tmp, 3 * sizeof(felem));
1145 skip = 0;
1146 }
1147
1148 /* second, look at the current position */
1149 bits = get_bit(g_scalar, i + 168) << 3;
1150 bits |= get_bit(g_scalar, i + 112) << 2;
1151 bits |= get_bit(g_scalar, i + 56) << 1;
1152 bits |= get_bit(g_scalar, i);
1153 /* select the point to add, in constant time */
1154 select_point(bits, 16, g_pre_comp[0], tmp);
1155 point_add(nq[0], nq[1], nq[2],
1156 nq[0], nq[1], nq[2],
1157 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1158 }
1159
1160 /* do other additions every 5 doublings */
1161 if (num_points && (i % 5 == 0)) {
1162 /* loop over all scalars */
1163 for (num = 0; num < num_points; ++num) {
1164 bits = get_bit(scalars[num], i + 4) << 5;
1165 bits |= get_bit(scalars[num], i + 3) << 4;
1166 bits |= get_bit(scalars[num], i + 2) << 3;
1167 bits |= get_bit(scalars[num], i + 1) << 2;
1168 bits |= get_bit(scalars[num], i) << 1;
1169 bits |= get_bit(scalars[num], i - 1);
1170 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1171
1172 /* select the point to add or subtract */
1173 select_point(digit, 17, pre_comp[num], tmp);
1174 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1175 * point */
1176 copy_conditional(tmp[1], tmp[3], sign);
1177
1178 if (!skip) {
1179 point_add(nq[0], nq[1], nq[2],
1180 nq[0], nq[1], nq[2],
1181 mixed, tmp[0], tmp[1], tmp[2]);
1182 } else {
1183 memcpy(nq, tmp, 3 * sizeof(felem));
1184 skip = 0;
1185 }
1186 }
1187 }
1188 }
1189 felem_assign(x_out, nq[0]);
1190 felem_assign(y_out, nq[1]);
1191 felem_assign(z_out, nq[2]);
1192 }
1193
1194 /******************************************************************************/
1195 /*
1196 * FUNCTIONS TO MANAGE PRECOMPUTATION
1197 */
1198
1199 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1200 {
1201 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1202
1203 if (!ret) {
1204 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1205 return ret;
1206 }
1207 ret->references = 1;
1208 return ret;
1209 }
1210
1211 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1212 {
1213 if (p != NULL)
1214 CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1215 return p;
1216 }
1217
1218 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1219 {
1220 if (p == NULL
1221 || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
1222 return;
1223 OPENSSL_free(p);
1224 }
1225
1226 /******************************************************************************/
1227 /*
1228 * OPENSSL EC_METHOD FUNCTIONS
1229 */
1230
1231 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1232 {
1233 int ret;
1234 ret = ec_GFp_simple_group_init(group);
1235 group->a_is_minus3 = 1;
1236 return ret;
1237 }
1238
1239 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1240 const BIGNUM *a, const BIGNUM *b,
1241 BN_CTX *ctx)
1242 {
1243 int ret = 0;
1244 BN_CTX *new_ctx = NULL;
1245 BIGNUM *curve_p, *curve_a, *curve_b;
1246
1247 if (ctx == NULL)
1248 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1249 return 0;
1250 BN_CTX_start(ctx);
1251 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1252 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1253 ((curve_b = BN_CTX_get(ctx)) == NULL))
1254 goto err;
1255 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1256 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1257 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1258 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1259 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1260 EC_R_WRONG_CURVE_PARAMETERS);
1261 goto err;
1262 }
1263 group->field_mod_func = BN_nist_mod_224;
1264 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1265 err:
1266 BN_CTX_end(ctx);
1267 BN_CTX_free(new_ctx);
1268 return ret;
1269 }
1270
1271 /*
1272 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1273 * (X/Z^2, Y/Z^3)
1274 */
1275 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1276 const EC_POINT *point,
1277 BIGNUM *x, BIGNUM *y,
1278 BN_CTX *ctx)
1279 {
1280 felem z1, z2, x_in, y_in, x_out, y_out;
1281 widefelem tmp;
1282
1283 if (EC_POINT_is_at_infinity(group, point)) {
1284 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1285 EC_R_POINT_AT_INFINITY);
1286 return 0;
1287 }
1288 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1289 (!BN_to_felem(z1, point->Z)))
1290 return 0;
1291 felem_inv(z2, z1);
1292 felem_square(tmp, z2);
1293 felem_reduce(z1, tmp);
1294 felem_mul(tmp, x_in, z1);
1295 felem_reduce(x_in, tmp);
1296 felem_contract(x_out, x_in);
1297 if (x != NULL) {
1298 if (!felem_to_BN(x, x_out)) {
1299 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1300 ERR_R_BN_LIB);
1301 return 0;
1302 }
1303 }
1304 felem_mul(tmp, z1, z2);
1305 felem_reduce(z1, tmp);
1306 felem_mul(tmp, y_in, z1);
1307 felem_reduce(y_in, tmp);
1308 felem_contract(y_out, y_in);
1309 if (y != NULL) {
1310 if (!felem_to_BN(y, y_out)) {
1311 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1312 ERR_R_BN_LIB);
1313 return 0;
1314 }
1315 }
1316 return 1;
1317 }
1318
1319 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1320 felem tmp_felems[ /* num+1 */ ])
1321 {
1322 /*
1323 * Runs in constant time, unless an input is the point at infinity (which
1324 * normally shouldn't happen).
1325 */
1326 ec_GFp_nistp_points_make_affine_internal(num,
1327 points,
1328 sizeof(felem),
1329 tmp_felems,
1330 (void (*)(void *))felem_one,
1331 (int (*)(const void *))
1332 felem_is_zero_int,
1333 (void (*)(void *, const void *))
1334 felem_assign,
1335 (void (*)(void *, const void *))
1336 felem_square_reduce, (void (*)
1337 (void *,
1338 const void
1339 *,
1340 const void
1341 *))
1342 felem_mul_reduce,
1343 (void (*)(void *, const void *))
1344 felem_inv,
1345 (void (*)(void *, const void *))
1346 felem_contract);
1347 }
1348
1349 /*
1350 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1351 * values Result is stored in r (r can equal one of the inputs).
1352 */
1353 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1354 const BIGNUM *scalar, size_t num,
1355 const EC_POINT *points[],
1356 const BIGNUM *scalars[], BN_CTX *ctx)
1357 {
1358 int ret = 0;
1359 int j;
1360 unsigned i;
1361 int mixed = 0;
1362 BN_CTX *new_ctx = NULL;
1363 BIGNUM *x, *y, *z, *tmp_scalar;
1364 felem_bytearray g_secret;
1365 felem_bytearray *secrets = NULL;
1366 felem (*pre_comp)[17][3] = NULL;
1367 felem *tmp_felems = NULL;
1368 felem_bytearray tmp;
1369 unsigned num_bytes;
1370 int have_pre_comp = 0;
1371 size_t num_points = num;
1372 felem x_in, y_in, z_in, x_out, y_out, z_out;
1373 NISTP224_PRE_COMP *pre = NULL;
1374 const felem(*g_pre_comp)[16][3] = NULL;
1375 EC_POINT *generator = NULL;
1376 const EC_POINT *p = NULL;
1377 const BIGNUM *p_scalar = NULL;
1378
1379 if (ctx == NULL)
1380 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1381 return 0;
1382 BN_CTX_start(ctx);
1383 if (((x = BN_CTX_get(ctx)) == NULL) ||
1384 ((y = BN_CTX_get(ctx)) == NULL) ||
1385 ((z = BN_CTX_get(ctx)) == NULL) ||
1386 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1387 goto err;
1388
1389 if (scalar != NULL) {
1390 pre = group->pre_comp.nistp224;
1391 if (pre)
1392 /* we have precomputation, try to use it */
1393 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1394 else
1395 /* try to use the standard precomputation */
1396 g_pre_comp = &gmul[0];
1397 generator = EC_POINT_new(group);
1398 if (generator == NULL)
1399 goto err;
1400 /* get the generator from precomputation */
1401 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1402 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1403 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1404 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1405 goto err;
1406 }
1407 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1408 generator, x, y, z,
1409 ctx))
1410 goto err;
1411 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1412 /* precomputation matches generator */
1413 have_pre_comp = 1;
1414 else
1415 /*
1416 * we don't have valid precomputation: treat the generator as a
1417 * random point
1418 */
1419 num_points = num_points + 1;
1420 }
1421
1422 if (num_points > 0) {
1423 if (num_points >= 3) {
1424 /*
1425 * unless we precompute multiples for just one or two points,
1426 * converting those into affine form is time well spent
1427 */
1428 mixed = 1;
1429 }
1430 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1431 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1432 if (mixed)
1433 tmp_felems =
1434 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1435 if ((secrets == NULL) || (pre_comp == NULL)
1436 || (mixed && (tmp_felems == NULL))) {
1437 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1438 goto err;
1439 }
1440
1441 /*
1442 * we treat NULL scalars as 0, and NULL points as points at infinity,
1443 * i.e., they contribute nothing to the linear combination
1444 */
1445 for (i = 0; i < num_points; ++i) {
1446 if (i == num)
1447 /* the generator */
1448 {
1449 p = EC_GROUP_get0_generator(group);
1450 p_scalar = scalar;
1451 } else
1452 /* the i^th point */
1453 {
1454 p = points[i];
1455 p_scalar = scalars[i];
1456 }
1457 if ((p_scalar != NULL) && (p != NULL)) {
1458 /* reduce scalar to 0 <= scalar < 2^224 */
1459 if ((BN_num_bits(p_scalar) > 224)
1460 || (BN_is_negative(p_scalar))) {
1461 /*
1462 * this is an unusual input, and we don't guarantee
1463 * constant-timeness
1464 */
1465 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1466 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1467 goto err;
1468 }
1469 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1470 } else
1471 num_bytes = BN_bn2bin(p_scalar, tmp);
1472 flip_endian(secrets[i], tmp, num_bytes);
1473 /* precompute multiples */
1474 if ((!BN_to_felem(x_out, p->X)) ||
1475 (!BN_to_felem(y_out, p->Y)) ||
1476 (!BN_to_felem(z_out, p->Z)))
1477 goto err;
1478 felem_assign(pre_comp[i][1][0], x_out);
1479 felem_assign(pre_comp[i][1][1], y_out);
1480 felem_assign(pre_comp[i][1][2], z_out);
1481 for (j = 2; j <= 16; ++j) {
1482 if (j & 1) {
1483 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1484 pre_comp[i][j][2], pre_comp[i][1][0],
1485 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1486 pre_comp[i][j - 1][0],
1487 pre_comp[i][j - 1][1],
1488 pre_comp[i][j - 1][2]);
1489 } else {
1490 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1491 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1492 pre_comp[i][j / 2][1],
1493 pre_comp[i][j / 2][2]);
1494 }
1495 }
1496 }
1497 }
1498 if (mixed)
1499 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1500 }
1501
1502 /* the scalar for the generator */
1503 if ((scalar != NULL) && (have_pre_comp)) {
1504 memset(g_secret, 0, sizeof(g_secret));
1505 /* reduce scalar to 0 <= scalar < 2^224 */
1506 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1507 /*
1508 * this is an unusual input, and we don't guarantee
1509 * constant-timeness
1510 */
1511 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1512 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1513 goto err;
1514 }
1515 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1516 } else
1517 num_bytes = BN_bn2bin(scalar, tmp);
1518 flip_endian(g_secret, tmp, num_bytes);
1519 /* do the multiplication with generator precomputation */
1520 batch_mul(x_out, y_out, z_out,
1521 (const felem_bytearray(*))secrets, num_points,
1522 g_secret,
1523 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1524 } else
1525 /* do the multiplication without generator precomputation */
1526 batch_mul(x_out, y_out, z_out,
1527 (const felem_bytearray(*))secrets, num_points,
1528 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1529 /* reduce the output to its unique minimal representation */
1530 felem_contract(x_in, x_out);
1531 felem_contract(y_in, y_out);
1532 felem_contract(z_in, z_out);
1533 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1534 (!felem_to_BN(z, z_in))) {
1535 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1536 goto err;
1537 }
1538 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1539
1540 err:
1541 BN_CTX_end(ctx);
1542 EC_POINT_free(generator);
1543 BN_CTX_free(new_ctx);
1544 OPENSSL_free(secrets);
1545 OPENSSL_free(pre_comp);
1546 OPENSSL_free(tmp_felems);
1547 return ret;
1548 }
1549
1550 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1551 {
1552 int ret = 0;
1553 NISTP224_PRE_COMP *pre = NULL;
1554 int i, j;
1555 BN_CTX *new_ctx = NULL;
1556 BIGNUM *x, *y;
1557 EC_POINT *generator = NULL;
1558 felem tmp_felems[32];
1559
1560 /* throw away old precomputation */
1561 EC_pre_comp_free(group);
1562 if (ctx == NULL)
1563 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1564 return 0;
1565 BN_CTX_start(ctx);
1566 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1567 goto err;
1568 /* get the generator */
1569 if (group->generator == NULL)
1570 goto err;
1571 generator = EC_POINT_new(group);
1572 if (generator == NULL)
1573 goto err;
1574 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1575 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1576 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1577 goto err;
1578 if ((pre = nistp224_pre_comp_new()) == NULL)
1579 goto err;
1580 /*
1581 * if the generator is the standard one, use built-in precomputation
1582 */
1583 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1584 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1585 ret = 1;
1586 goto err;
1587 }
1588 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1589 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1590 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1591 goto err;
1592 /*
1593 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1594 * 2^140*G, 2^196*G for the second one
1595 */
1596 for (i = 1; i <= 8; i <<= 1) {
1597 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1598 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1599 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1600 for (j = 0; j < 27; ++j) {
1601 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1602 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1603 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1604 }
1605 if (i == 8)
1606 break;
1607 point_double(pre->g_pre_comp[0][2 * i][0],
1608 pre->g_pre_comp[0][2 * i][1],
1609 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1610 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1611 for (j = 0; j < 27; ++j) {
1612 point_double(pre->g_pre_comp[0][2 * i][0],
1613 pre->g_pre_comp[0][2 * i][1],
1614 pre->g_pre_comp[0][2 * i][2],
1615 pre->g_pre_comp[0][2 * i][0],
1616 pre->g_pre_comp[0][2 * i][1],
1617 pre->g_pre_comp[0][2 * i][2]);
1618 }
1619 }
1620 for (i = 0; i < 2; i++) {
1621 /* g_pre_comp[i][0] is the point at infinity */
1622 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1623 /* the remaining multiples */
1624 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1625 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1626 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1627 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1628 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1629 pre->g_pre_comp[i][2][2]);
1630 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1631 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1632 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1633 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1634 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1635 pre->g_pre_comp[i][2][2]);
1636 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1637 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1638 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1639 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1640 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1641 pre->g_pre_comp[i][4][2]);
1642 /*
1643 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1644 */
1645 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1646 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1647 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1648 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1649 pre->g_pre_comp[i][2][2]);
1650 for (j = 1; j < 8; ++j) {
1651 /* odd multiples: add G resp. 2^28*G */
1652 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1653 pre->g_pre_comp[i][2 * j + 1][1],
1654 pre->g_pre_comp[i][2 * j + 1][2],
1655 pre->g_pre_comp[i][2 * j][0],
1656 pre->g_pre_comp[i][2 * j][1],
1657 pre->g_pre_comp[i][2 * j][2], 0,
1658 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1659 pre->g_pre_comp[i][1][2]);
1660 }
1661 }
1662 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1663
1664 SETPRECOMP(group, nistp224, pre);
1665 pre = NULL;
1666 ret = 1;
1667 err:
1668 BN_CTX_end(ctx);
1669 EC_POINT_free(generator);
1670 BN_CTX_free(new_ctx);
1671 EC_nistp224_pre_comp_free(pre);
1672 return ret;
1673 }
1674
1675 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1676 {
1677 return HAVEPRECOMP(group, nistp224);
1678 }
1679
1680 #else
1681 static void *dummy = &dummy;
1682 #endif