]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp224.c
Fix a SCA leak using BN_bn2bin()
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
CommitLineData
04daec86 1/*
0d664759 2 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
4f22f405 3 *
a7f182b7 4 * Licensed under the Apache License 2.0 (the "License"). You may not use
4f22f405
RS
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
04daec86 8 */
4f22f405 9
3e00b4c9 10/* Copyright 2011 Google Inc.
04daec86 11 *
3e00b4c9 12 * Licensed under the Apache License, Version 2.0 (the "License");
04daec86 13 *
3e00b4c9
BM
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
04daec86 16 *
3e00b4c9 17 * http://www.apache.org/licenses/LICENSE-2.0
04daec86 18 *
3e00b4c9
BM
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
04daec86
BM
24 */
25
26/*
27 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28 *
29 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30 * and Adam Langley's public domain 64-bit C implementation of curve25519
31 */
e0d6132b
BM
32
33#include <openssl/opensslconf.h>
effaf4de
RS
34#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35NON_EMPTY_TRANSLATION_UNIT
36#else
e0d6132b 37
0f113f3e
MC
38# include <stdint.h>
39# include <string.h>
40# include <openssl/err.h>
41# include "ec_lcl.h"
04daec86 42
6afed267 43# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
396cb565 44 /* even with gcc, the typedef won't work for 32-bit platforms */
0f113f3e
MC
45typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
46 * platforms */
47# else
8cc1dc36 48# error "Your compiler doesn't appear to support 128-bit integer types"
0f113f3e 49# endif
04daec86
BM
50
51typedef uint8_t u8;
3e00b4c9 52typedef uint64_t u64;
04daec86 53
04daec86 54/******************************************************************************/
1d97c843
TH
55/*-
56 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
04daec86
BM
57 *
58 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
3e00b4c9
BM
59 * using 64-bit coefficients called 'limbs',
60 * and sometimes (for multiplication results) as
04daec86 61 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
3e00b4c9
BM
62 * using 128-bit coefficients called 'widelimbs'.
63 * A 4-limb representation is an 'felem';
64 * a 7-widelimb representation is a 'widefelem'.
65 * Even within felems, bits of adjacent limbs overlap, and we don't always
66 * reduce the representations: we ensure that inputs to each felem
04daec86
BM
67 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68 * and fit into a 128-bit word without overflow. The coefficients are then
3e00b4c9
BM
69 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70 * We only reduce to the unique minimal representation at the end of the
71 * computation.
04daec86
BM
72 */
73
3e00b4c9
BM
74typedef uint64_t limb;
75typedef uint128_t widelimb;
76
77typedef limb felem[4];
78typedef widelimb widefelem[7];
04daec86 79
0f113f3e 80/*
46f4e1be 81 * Field element represented as a byte array. 28*8 = 224 bits is also the
0f113f3e
MC
82 * group order size for the elliptic curve, and we also use this type for
83 * scalars for point multiplication.
84 */
396cb565
BM
85typedef u8 felem_bytearray[28];
86
87static const felem_bytearray nistp224_curve_params[5] = {
0f113f3e
MC
88 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
89 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
90 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
91 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
92 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
94 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
95 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
96 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
97 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
98 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
99 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
100 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
101 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
102 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
396cb565 103};
04daec86 104
1d97c843
TH
105/*-
106 * Precomputed multiples of the standard generator
3e00b4c9
BM
107 * Points are given in coordinates (X, Y, Z) where Z normally is 1
108 * (0 for the point at infinity).
109 * For each field element, slice a_0 is word 0, etc.
110 *
111 * The table has 2 * 16 elements, starting with the following:
112 * index | bits | point
113 * ------+---------+------------------------------
114 * 0 | 0 0 0 0 | 0G
115 * 1 | 0 0 0 1 | 1G
116 * 2 | 0 0 1 0 | 2^56G
117 * 3 | 0 0 1 1 | (2^56 + 1)G
118 * 4 | 0 1 0 0 | 2^112G
119 * 5 | 0 1 0 1 | (2^112 + 1)G
120 * 6 | 0 1 1 0 | (2^112 + 2^56)G
121 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
122 * 8 | 1 0 0 0 | 2^168G
123 * 9 | 1 0 0 1 | (2^168 + 1)G
124 * 10 | 1 0 1 0 | (2^168 + 2^56)G
125 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
126 * 12 | 1 1 0 0 | (2^168 + 2^112)G
127 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
128 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
129 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
130 * followed by a copy of this with each element multiplied by 2^28.
131 *
132 * The reason for this is so that we can clock bits into four different
133 * locations when doing simple scalar multiplies against the base point,
134 * and then another four locations using the second 16 elements.
135 */
4eb504ae
AP
136static const felem gmul[2][16][3] = {
137{{{0, 0, 0, 0},
138 {0, 0, 0, 0},
139 {0, 0, 0, 0}},
140 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
141 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
142 {1, 0, 0, 0}},
143 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
145 {1, 0, 0, 0}},
146 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
147 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
148 {1, 0, 0, 0}},
149 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
151 {1, 0, 0, 0}},
152 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
153 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
154 {1, 0, 0, 0}},
155 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
156 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
157 {1, 0, 0, 0}},
158 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
159 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
160 {1, 0, 0, 0}},
161 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
162 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
163 {1, 0, 0, 0}},
164 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
165 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
166 {1, 0, 0, 0}},
167 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
168 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
169 {1, 0, 0, 0}},
170 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
171 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
172 {1, 0, 0, 0}},
173 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
174 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
175 {1, 0, 0, 0}},
176 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
177 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
178 {1, 0, 0, 0}},
179 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
180 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
181 {1, 0, 0, 0}},
182 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
183 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
184 {1, 0, 0, 0}}},
0f113f3e
MC
185{{{0, 0, 0, 0},
186 {0, 0, 0, 0},
187 {0, 0, 0, 0}},
188 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
189 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
190 {1, 0, 0, 0}},
191 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
192 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
193 {1, 0, 0, 0}},
194 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
195 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
196 {1, 0, 0, 0}},
197 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
198 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
199 {1, 0, 0, 0}},
200 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
201 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
202 {1, 0, 0, 0}},
203 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
204 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
205 {1, 0, 0, 0}},
206 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
207 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
208 {1, 0, 0, 0}},
209 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
210 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
211 {1, 0, 0, 0}},
212 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
213 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
214 {1, 0, 0, 0}},
215 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
216 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
217 {1, 0, 0, 0}},
218 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
219 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
220 {1, 0, 0, 0}},
221 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
222 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
223 {1, 0, 0, 0}},
224 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
225 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
226 {1, 0, 0, 0}},
227 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
228 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
229 {1, 0, 0, 0}},
230 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
231 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232 {1, 0, 0, 0}}}
233};
04daec86
BM
234
235/* Precomputation for the group generator. */
3aef36ff 236struct nistp224_pre_comp_st {
0f113f3e 237 felem g_pre_comp[2][16][3];
2f545ae4 238 CRYPTO_REF_COUNT references;
9b398ef2 239 CRYPTO_RWLOCK *lock;
3aef36ff 240};
04daec86
BM
241
242const EC_METHOD *EC_GFp_nistp224_method(void)
0f113f3e
MC
243{
244 static const EC_METHOD ret = {
245 EC_FLAGS_DEFAULT_OCT,
246 NID_X9_62_prime_field,
247 ec_GFp_nistp224_group_init,
248 ec_GFp_simple_group_finish,
249 ec_GFp_simple_group_clear_finish,
250 ec_GFp_nist_group_copy,
251 ec_GFp_nistp224_group_set_curve,
252 ec_GFp_simple_group_get_curve,
253 ec_GFp_simple_group_get_degree,
9ff9bccc 254 ec_group_simple_order_bits,
0f113f3e
MC
255 ec_GFp_simple_group_check_discriminant,
256 ec_GFp_simple_point_init,
257 ec_GFp_simple_point_finish,
258 ec_GFp_simple_point_clear_finish,
259 ec_GFp_simple_point_copy,
260 ec_GFp_simple_point_set_to_infinity,
261 ec_GFp_simple_set_Jprojective_coordinates_GFp,
262 ec_GFp_simple_get_Jprojective_coordinates_GFp,
263 ec_GFp_simple_point_set_affine_coordinates,
264 ec_GFp_nistp224_point_get_affine_coordinates,
265 0 /* point_set_compressed_coordinates */ ,
266 0 /* point2oct */ ,
267 0 /* oct2point */ ,
268 ec_GFp_simple_add,
269 ec_GFp_simple_dbl,
270 ec_GFp_simple_invert,
271 ec_GFp_simple_is_at_infinity,
272 ec_GFp_simple_is_on_curve,
273 ec_GFp_simple_cmp,
274 ec_GFp_simple_make_affine,
275 ec_GFp_simple_points_make_affine,
276 ec_GFp_nistp224_points_mul,
277 ec_GFp_nistp224_precompute_mult,
278 ec_GFp_nistp224_have_precompute_mult,
279 ec_GFp_nist_field_mul,
280 ec_GFp_nist_field_sqr,
281 0 /* field_div */ ,
e0033efc 282 ec_GFp_simple_field_inv,
0f113f3e
MC
283 0 /* field_encode */ ,
284 0 /* field_decode */ ,
9ff9bccc
DSH
285 0, /* field_set_to_one */
286 ec_key_simple_priv2oct,
287 ec_key_simple_oct2priv,
288 0, /* set private */
289 ec_key_simple_generate_key,
290 ec_key_simple_check_key,
291 ec_key_simple_generate_public_key,
292 0, /* keycopy */
293 0, /* keyfinish */
653b883b 294 ecdh_simple_compute_key,
9bf682f6
PS
295 ecdsa_simple_sign_setup,
296 ecdsa_simple_sign_sig,
297 ecdsa_simple_verify_sig,
f667820c 298 0, /* field_inverse_mod_ord */
37124360
NT
299 0, /* blind_coordinates */
300 0, /* ladder_pre */
301 0, /* ladder_step */
302 0 /* ladder_post */
0f113f3e
MC
303 };
304
305 return &ret;
306}
307
308/*
309 * Helper functions to convert field elements to/from internal representation
310 */
3e00b4c9 311static void bin28_to_felem(felem out, const u8 in[28])
0f113f3e
MC
312{
313 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
314 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
315 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
9fbbdd73 316 out[3] = (*((const uint64_t *)(in+20))) >> 8;
0f113f3e 317}
04daec86 318
3e00b4c9 319static void felem_to_bin28(u8 out[28], const felem in)
0f113f3e
MC
320{
321 unsigned i;
322 for (i = 0; i < 7; ++i) {
323 out[i] = in[0] >> (8 * i);
324 out[i + 7] = in[1] >> (8 * i);
325 out[i + 14] = in[2] >> (8 * i);
326 out[i + 21] = in[3] >> (8 * i);
327 }
328}
04daec86
BM
329
330/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
331static void flip_endian(u8 *out, const u8 *in, unsigned len)
0f113f3e
MC
332{
333 unsigned i;
334 for (i = 0; i < len; ++i)
335 out[i] = in[len - 1 - i];
336}
04daec86
BM
337
338/* From OpenSSL BIGNUM to internal representation */
3e00b4c9 339static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e
MC
340{
341 felem_bytearray b_in;
342 felem_bytearray b_out;
343 unsigned num_bytes;
344
0f113f3e 345 num_bytes = BN_num_bytes(bn);
cbe29648 346 if (num_bytes > sizeof(b_out)) {
0f113f3e
MC
347 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
348 return 0;
349 }
350 if (BN_is_negative(bn)) {
351 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
352 return 0;
353 }
805315d3 354 num_bytes = BN_bn2binpad(bn, b_in, sizeof(b_in));
0f113f3e
MC
355 flip_endian(b_out, b_in, num_bytes);
356 bin28_to_felem(out, b_out);
357 return 1;
358}
04daec86
BM
359
360/* From internal representation to OpenSSL BIGNUM */
3e00b4c9 361static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
0f113f3e
MC
362{
363 felem_bytearray b_in, b_out;
364 felem_to_bin28(b_in, in);
cbe29648
RS
365 flip_endian(b_out, b_in, sizeof(b_out));
366 return BN_bin2bn(b_out, sizeof(b_out), out);
0f113f3e 367}
04daec86
BM
368
369/******************************************************************************/
3a83462d 370/*-
0f113f3e 371 * FIELD OPERATIONS
04daec86
BM
372 *
373 * Field operations, using the internal representation of field elements.
374 * NB! These operations are specific to our point multiplication and cannot be
375 * expected to be correct in general - e.g., multiplication with a large scalar
376 * will cause an overflow.
377 *
378 */
379
3e00b4c9 380static void felem_one(felem out)
0f113f3e
MC
381{
382 out[0] = 1;
383 out[1] = 0;
384 out[2] = 0;
385 out[3] = 0;
386}
3e00b4c9
BM
387
388static void felem_assign(felem out, const felem in)
0f113f3e
MC
389{
390 out[0] = in[0];
391 out[1] = in[1];
392 out[2] = in[2];
393 out[3] = in[3];
394}
3e00b4c9 395
04daec86 396/* Sum two field elements: out += in */
3e00b4c9 397static void felem_sum(felem out, const felem in)
0f113f3e
MC
398{
399 out[0] += in[0];
400 out[1] += in[1];
401 out[2] += in[2];
402 out[3] += in[3];
403}
04daec86
BM
404
405/* Subtract field elements: out -= in */
406/* Assumes in[i] < 2^57 */
3e00b4c9 407static void felem_diff(felem out, const felem in)
0f113f3e
MC
408{
409 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
410 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
411 static const limb two58m42m2 = (((limb) 1) << 58) -
412 (((limb) 1) << 42) - (((limb) 1) << 2);
413
414 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
415 out[0] += two58p2;
416 out[1] += two58m42m2;
417 out[2] += two58m2;
418 out[3] += two58m2;
419
420 out[0] -= in[0];
421 out[1] -= in[1];
422 out[2] -= in[2];
423 out[3] -= in[3];
424}
04daec86 425
3e00b4c9 426/* Subtract in unreduced 128-bit mode: out -= in */
04daec86 427/* Assumes in[i] < 2^119 */
3e00b4c9 428static void widefelem_diff(widefelem out, const widefelem in)
0f113f3e
MC
429{
430 static const widelimb two120 = ((widelimb) 1) << 120;
431 static const widelimb two120m64 = (((widelimb) 1) << 120) -
432 (((widelimb) 1) << 64);
433 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
434 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
435
436 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
437 out[0] += two120;
438 out[1] += two120m64;
439 out[2] += two120m64;
440 out[3] += two120;
441 out[4] += two120m104m64;
442 out[5] += two120m64;
443 out[6] += two120m64;
444
445 out[0] -= in[0];
446 out[1] -= in[1];
447 out[2] -= in[2];
448 out[3] -= in[3];
449 out[4] -= in[4];
450 out[5] -= in[5];
451 out[6] -= in[6];
452}
04daec86
BM
453
454/* Subtract in mixed mode: out128 -= in64 */
455/* in[i] < 2^63 */
3e00b4c9 456static void felem_diff_128_64(widefelem out, const felem in)
0f113f3e
MC
457{
458 static const widelimb two64p8 = (((widelimb) 1) << 64) +
459 (((widelimb) 1) << 8);
460 static const widelimb two64m8 = (((widelimb) 1) << 64) -
461 (((widelimb) 1) << 8);
462 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
463 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
464
465 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
466 out[0] += two64p8;
467 out[1] += two64m48m8;
468 out[2] += two64m8;
469 out[3] += two64m8;
470
471 out[0] -= in[0];
472 out[1] -= in[1];
473 out[2] -= in[2];
474 out[3] -= in[3];
475}
476
477/*
478 * Multiply a field element by a scalar: out = out * scalar The scalars we
479 * actually use are small, so results fit without overflow
480 */
3e00b4c9 481static void felem_scalar(felem out, const limb scalar)
0f113f3e
MC
482{
483 out[0] *= scalar;
484 out[1] *= scalar;
485 out[2] *= scalar;
486 out[3] *= scalar;
487}
488
489/*
490 * Multiply an unreduced field element by a scalar: out = out * scalar The
491 * scalars we actually use are small, so results fit without overflow
492 */
3e00b4c9 493static void widefelem_scalar(widefelem out, const widelimb scalar)
0f113f3e
MC
494{
495 out[0] *= scalar;
496 out[1] *= scalar;
497 out[2] *= scalar;
498 out[3] *= scalar;
499 out[4] *= scalar;
500 out[5] *= scalar;
501 out[6] *= scalar;
502}
04daec86
BM
503
504/* Square a field element: out = in^2 */
3e00b4c9 505static void felem_square(widefelem out, const felem in)
0f113f3e
MC
506{
507 limb tmp0, tmp1, tmp2;
508 tmp0 = 2 * in[0];
509 tmp1 = 2 * in[1];
510 tmp2 = 2 * in[2];
511 out[0] = ((widelimb) in[0]) * in[0];
512 out[1] = ((widelimb) in[0]) * tmp1;
513 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
514 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
515 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
516 out[5] = ((widelimb) in[3]) * tmp2;
517 out[6] = ((widelimb) in[3]) * in[3];
518}
04daec86
BM
519
520/* Multiply two field elements: out = in1 * in2 */
3e00b4c9 521static void felem_mul(widefelem out, const felem in1, const felem in2)
0f113f3e
MC
522{
523 out[0] = ((widelimb) in1[0]) * in2[0];
524 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
525 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
4eb504ae 526 ((widelimb) in1[2]) * in2[0];
0f113f3e 527 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
4eb504ae 528 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
0f113f3e 529 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
4eb504ae 530 ((widelimb) in1[3]) * in2[1];
0f113f3e
MC
531 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
532 out[6] = ((widelimb) in1[3]) * in2[3];
533}
04daec86 534
3a83462d
MC
535/*-
536 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
3e00b4c9
BM
537 * Requires in[i] < 2^126,
538 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
539static void felem_reduce(felem out, const widefelem in)
0f113f3e
MC
540{
541 static const widelimb two127p15 = (((widelimb) 1) << 127) +
542 (((widelimb) 1) << 15);
543 static const widelimb two127m71 = (((widelimb) 1) << 127) -
544 (((widelimb) 1) << 71);
545 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
546 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
547 widelimb output[5];
548
549 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
550 output[0] = in[0] + two127p15;
551 output[1] = in[1] + two127m71m55;
552 output[2] = in[2] + two127m71;
553 output[3] = in[3];
554 output[4] = in[4];
555
556 /* Eliminate in[4], in[5], in[6] */
557 output[4] += in[6] >> 16;
558 output[3] += (in[6] & 0xffff) << 40;
559 output[2] -= in[6];
560
561 output[3] += in[5] >> 16;
562 output[2] += (in[5] & 0xffff) << 40;
563 output[1] -= in[5];
564
565 output[2] += output[4] >> 16;
566 output[1] += (output[4] & 0xffff) << 40;
567 output[0] -= output[4];
568
569 /* Carry 2 -> 3 -> 4 */
570 output[3] += output[2] >> 56;
571 output[2] &= 0x00ffffffffffffff;
572
573 output[4] = output[3] >> 56;
574 output[3] &= 0x00ffffffffffffff;
575
576 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
577
578 /* Eliminate output[4] */
579 output[2] += output[4] >> 16;
580 /* output[2] < 2^56 + 2^56 = 2^57 */
581 output[1] += (output[4] & 0xffff) << 40;
582 output[0] -= output[4];
583
584 /* Carry 0 -> 1 -> 2 -> 3 */
585 output[1] += output[0] >> 56;
586 out[0] = output[0] & 0x00ffffffffffffff;
587
588 output[2] += output[1] >> 56;
589 /* output[2] < 2^57 + 2^72 */
590 out[1] = output[1] & 0x00ffffffffffffff;
591 output[3] += output[2] >> 56;
592 /* output[3] <= 2^56 + 2^16 */
593 out[2] = output[2] & 0x00ffffffffffffff;
594
50e735f9
MC
595 /*-
596 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
597 * out[3] <= 2^56 + 2^16 (due to final carry),
598 * so out < 2*p
599 */
0f113f3e
MC
600 out[3] = output[3];
601}
04daec86 602
3e00b4c9 603static void felem_square_reduce(felem out, const felem in)
0f113f3e
MC
604{
605 widefelem tmp;
606 felem_square(tmp, in);
607 felem_reduce(out, tmp);
608}
04daec86 609
3e00b4c9 610static void felem_mul_reduce(felem out, const felem in1, const felem in2)
0f113f3e
MC
611{
612 widefelem tmp;
613 felem_mul(tmp, in1, in2);
614 felem_reduce(out, tmp);
615}
616
617/*
618 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
619 * call felem_reduce first)
620 */
3e00b4c9 621static void felem_contract(felem out, const felem in)
0f113f3e
MC
622{
623 static const int64_t two56 = ((limb) 1) << 56;
624 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
625 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
626 int64_t tmp[4], a;
627 tmp[0] = in[0];
628 tmp[1] = in[1];
629 tmp[2] = in[2];
630 tmp[3] = in[3];
631 /* Case 1: a = 1 iff in >= 2^224 */
632 a = (in[3] >> 56);
633 tmp[0] -= a;
634 tmp[1] += a << 40;
635 tmp[3] &= 0x00ffffffffffffff;
636 /*
637 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
638 * and the lower part is non-zero
639 */
640 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
641 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
642 a &= 0x00ffffffffffffff;
643 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
644 a = (a - 1) >> 63;
645 /* subtract 2^224 - 2^96 + 1 if a is all-one */
646 tmp[3] &= a ^ 0xffffffffffffffff;
647 tmp[2] &= a ^ 0xffffffffffffffff;
648 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
649 tmp[0] -= 1 & a;
650
651 /*
652 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
653 * non-zero, so we only need one step
654 */
655 a = tmp[0] >> 63;
656 tmp[0] += two56 & a;
657 tmp[1] -= 1 & a;
658
659 /* carry 1 -> 2 -> 3 */
660 tmp[2] += tmp[1] >> 56;
661 tmp[1] &= 0x00ffffffffffffff;
662
663 tmp[3] += tmp[2] >> 56;
664 tmp[2] &= 0x00ffffffffffffff;
665
666 /* Now 0 <= out < p */
667 out[0] = tmp[0];
668 out[1] = tmp[1];
669 out[2] = tmp[2];
670 out[3] = tmp[3];
671}
672
dc55e4f7
DB
673/*
674 * Get negative value: out = -in
675 * Requires in[i] < 2^63,
676 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
677 */
678static void felem_neg(felem out, const felem in)
679{
80de1742
P
680 widefelem tmp;
681
682 memset(tmp, 0, sizeof(tmp));
dc55e4f7
DB
683 felem_diff_128_64(tmp, in);
684 felem_reduce(out, tmp);
685}
686
0f113f3e
MC
687/*
688 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
689 * elements are reduced to in < 2^225, so we only need to check three cases:
690 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
691 */
3e00b4c9 692static limb felem_is_zero(const felem in)
0f113f3e
MC
693{
694 limb zero, two224m96p1, two225m97p2;
695
696 zero = in[0] | in[1] | in[2] | in[3];
697 zero = (((int64_t) (zero) - 1) >> 63) & 1;
698 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
699 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
700 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
701 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
702 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
703 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
704 return (zero | two224m96p1 | two225m97p2);
705}
04daec86 706
c55b786a 707static int felem_is_zero_int(const void *in)
0f113f3e
MC
708{
709 return (int)(felem_is_zero(in) & ((limb) 1));
710}
3e00b4c9 711
04daec86
BM
712/* Invert a field element */
713/* Computation chain copied from djb's code */
3e00b4c9 714static void felem_inv(felem out, const felem in)
0f113f3e
MC
715{
716 felem ftmp, ftmp2, ftmp3, ftmp4;
717 widefelem tmp;
718 unsigned i;
719
720 felem_square(tmp, in);
721 felem_reduce(ftmp, tmp); /* 2 */
722 felem_mul(tmp, in, ftmp);
723 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
724 felem_square(tmp, ftmp);
725 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
726 felem_mul(tmp, in, ftmp);
727 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
728 felem_square(tmp, ftmp);
729 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
730 felem_square(tmp, ftmp2);
731 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
732 felem_square(tmp, ftmp2);
733 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
734 felem_mul(tmp, ftmp2, ftmp);
735 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
736 felem_square(tmp, ftmp);
737 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
738 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
739 felem_square(tmp, ftmp2);
740 felem_reduce(ftmp2, tmp);
741 }
742 felem_mul(tmp, ftmp2, ftmp);
743 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
744 felem_square(tmp, ftmp2);
745 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
746 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
747 felem_square(tmp, ftmp3);
748 felem_reduce(ftmp3, tmp);
749 }
750 felem_mul(tmp, ftmp3, ftmp2);
751 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
752 felem_square(tmp, ftmp2);
753 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
754 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
755 felem_square(tmp, ftmp3);
756 felem_reduce(ftmp3, tmp);
757 }
758 felem_mul(tmp, ftmp3, ftmp2);
759 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
760 felem_square(tmp, ftmp3);
761 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
762 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
763 felem_square(tmp, ftmp4);
764 felem_reduce(ftmp4, tmp);
765 }
766 felem_mul(tmp, ftmp3, ftmp4);
767 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
768 felem_square(tmp, ftmp3);
769 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
770 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
771 felem_square(tmp, ftmp4);
772 felem_reduce(ftmp4, tmp);
773 }
774 felem_mul(tmp, ftmp2, ftmp4);
775 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
776 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
777 felem_square(tmp, ftmp2);
778 felem_reduce(ftmp2, tmp);
779 }
780 felem_mul(tmp, ftmp2, ftmp);
781 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
782 felem_square(tmp, ftmp);
783 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
784 felem_mul(tmp, ftmp, in);
785 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
786 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
787 felem_square(tmp, ftmp);
788 felem_reduce(ftmp, tmp);
789 }
790 felem_mul(tmp, ftmp, ftmp3);
791 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
792}
793
794/*
795 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
796 * out to itself.
797 */
798static void copy_conditional(felem out, const felem in, limb icopy)
799{
800 unsigned i;
801 /*
802 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
803 */
804 const limb copy = -icopy;
805 for (i = 0; i < 4; ++i) {
806 const limb tmp = copy & (in[i] ^ out[i]);
807 out[i] ^= tmp;
808 }
809}
04daec86 810
04daec86 811/******************************************************************************/
3a83462d 812/*-
0f113f3e 813 * ELLIPTIC CURVE POINT OPERATIONS
04daec86
BM
814 *
815 * Points are represented in Jacobian projective coordinates:
816 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
817 * or to the point at infinity if Z == 0.
818 *
819 */
820
1d97c843
TH
821/*-
822 * Double an elliptic curve point:
04daec86
BM
823 * (X', Y', Z') = 2 * (X, Y, Z), where
824 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
eb8e052c 825 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
04daec86
BM
826 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
827 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
0f113f3e 828 * while x_out == y_in is not (maybe this works, but it's not tested).
1d97c843 829 */
04daec86 830static void
3e00b4c9
BM
831point_double(felem x_out, felem y_out, felem z_out,
832 const felem x_in, const felem y_in, const felem z_in)
0f113f3e
MC
833{
834 widefelem tmp, tmp2;
835 felem delta, gamma, beta, alpha, ftmp, ftmp2;
836
837 felem_assign(ftmp, x_in);
838 felem_assign(ftmp2, x_in);
839
840 /* delta = z^2 */
841 felem_square(tmp, z_in);
842 felem_reduce(delta, tmp);
843
844 /* gamma = y^2 */
845 felem_square(tmp, y_in);
846 felem_reduce(gamma, tmp);
847
848 /* beta = x*gamma */
849 felem_mul(tmp, x_in, gamma);
850 felem_reduce(beta, tmp);
851
852 /* alpha = 3*(x-delta)*(x+delta) */
853 felem_diff(ftmp, delta);
854 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
855 felem_sum(ftmp2, delta);
856 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
857 felem_scalar(ftmp2, 3);
858 /* ftmp2[i] < 3 * 2^58 < 2^60 */
859 felem_mul(tmp, ftmp, ftmp2);
860 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
861 felem_reduce(alpha, tmp);
862
863 /* x' = alpha^2 - 8*beta */
864 felem_square(tmp, alpha);
865 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
866 felem_assign(ftmp, beta);
867 felem_scalar(ftmp, 8);
868 /* ftmp[i] < 8 * 2^57 = 2^60 */
869 felem_diff_128_64(tmp, ftmp);
870 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
871 felem_reduce(x_out, tmp);
872
873 /* z' = (y + z)^2 - gamma - delta */
874 felem_sum(delta, gamma);
875 /* delta[i] < 2^57 + 2^57 = 2^58 */
876 felem_assign(ftmp, y_in);
877 felem_sum(ftmp, z_in);
878 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
879 felem_square(tmp, ftmp);
880 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
881 felem_diff_128_64(tmp, delta);
882 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
883 felem_reduce(z_out, tmp);
884
885 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
886 felem_scalar(beta, 4);
887 /* beta[i] < 4 * 2^57 = 2^59 */
888 felem_diff(beta, x_out);
889 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
890 felem_mul(tmp, alpha, beta);
891 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
892 felem_square(tmp2, gamma);
893 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
894 widefelem_scalar(tmp2, 8);
895 /* tmp2[i] < 8 * 2^116 = 2^119 */
896 widefelem_diff(tmp, tmp2);
897 /* tmp[i] < 2^119 + 2^120 < 2^121 */
898 felem_reduce(y_out, tmp);
899}
04daec86 900
1d97c843
TH
901/*-
902 * Add two elliptic curve points:
04daec86
BM
903 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
904 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
905 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
906 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
907 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
3e00b4c9
BM
908 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
909 *
910 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
911 */
04daec86 912
0f113f3e
MC
913/*
914 * This function is not entirely constant-time: it includes a branch for
915 * checking whether the two input points are equal, (while not equal to the
916 * point at infinity). This case never happens during single point
917 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
918 */
3e00b4c9 919static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
920 const felem x1, const felem y1, const felem z1,
921 const int mixed, const felem x2, const felem y2,
922 const felem z2)
923{
924 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
925 widefelem tmp, tmp2;
926 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
927
928 if (!mixed) {
929 /* ftmp2 = z2^2 */
930 felem_square(tmp, z2);
931 felem_reduce(ftmp2, tmp);
932
933 /* ftmp4 = z2^3 */
934 felem_mul(tmp, ftmp2, z2);
935 felem_reduce(ftmp4, tmp);
936
937 /* ftmp4 = z2^3*y1 */
938 felem_mul(tmp2, ftmp4, y1);
939 felem_reduce(ftmp4, tmp2);
940
941 /* ftmp2 = z2^2*x1 */
942 felem_mul(tmp2, ftmp2, x1);
943 felem_reduce(ftmp2, tmp2);
944 } else {
945 /*
946 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
947 */
948
949 /* ftmp4 = z2^3*y1 */
950 felem_assign(ftmp4, y1);
951
952 /* ftmp2 = z2^2*x1 */
953 felem_assign(ftmp2, x1);
954 }
955
956 /* ftmp = z1^2 */
957 felem_square(tmp, z1);
958 felem_reduce(ftmp, tmp);
959
960 /* ftmp3 = z1^3 */
961 felem_mul(tmp, ftmp, z1);
962 felem_reduce(ftmp3, tmp);
963
964 /* tmp = z1^3*y2 */
965 felem_mul(tmp, ftmp3, y2);
966 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
967
968 /* ftmp3 = z1^3*y2 - z2^3*y1 */
969 felem_diff_128_64(tmp, ftmp4);
970 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
971 felem_reduce(ftmp3, tmp);
972
973 /* tmp = z1^2*x2 */
974 felem_mul(tmp, ftmp, x2);
975 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
976
977 /* ftmp = z1^2*x2 - z2^2*x1 */
978 felem_diff_128_64(tmp, ftmp2);
979 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
980 felem_reduce(ftmp, tmp);
981
982 /*
983 * the formulae are incorrect if the points are equal so we check for
984 * this and do doubling if this happens
985 */
986 x_equal = felem_is_zero(ftmp);
987 y_equal = felem_is_zero(ftmp3);
988 z1_is_zero = felem_is_zero(z1);
989 z2_is_zero = felem_is_zero(z2);
990 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
991 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
992 point_double(x3, y3, z3, x1, y1, z1);
993 return;
994 }
995
996 /* ftmp5 = z1*z2 */
997 if (!mixed) {
998 felem_mul(tmp, z1, z2);
999 felem_reduce(ftmp5, tmp);
1000 } else {
1001 /* special case z2 = 0 is handled later */
1002 felem_assign(ftmp5, z1);
1003 }
1004
1005 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1006 felem_mul(tmp, ftmp, ftmp5);
1007 felem_reduce(z_out, tmp);
1008
1009 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1010 felem_assign(ftmp5, ftmp);
1011 felem_square(tmp, ftmp);
1012 felem_reduce(ftmp, tmp);
1013
1014 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1015 felem_mul(tmp, ftmp, ftmp5);
1016 felem_reduce(ftmp5, tmp);
1017
1018 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1019 felem_mul(tmp, ftmp2, ftmp);
1020 felem_reduce(ftmp2, tmp);
1021
1022 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1023 felem_mul(tmp, ftmp4, ftmp5);
1024 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1025
1026 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1027 felem_square(tmp2, ftmp3);
1028 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1029
1030 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1031 felem_diff_128_64(tmp2, ftmp5);
1032 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1033
1034 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1035 felem_assign(ftmp5, ftmp2);
1036 felem_scalar(ftmp5, 2);
1037 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1038
50e735f9
MC
1039 /*-
1040 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1041 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1042 */
0f113f3e
MC
1043 felem_diff_128_64(tmp2, ftmp5);
1044 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1045 felem_reduce(x_out, tmp2);
1046
1047 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1048 felem_diff(ftmp2, x_out);
1049 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1050
1051 /*
1052 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1053 */
1054 felem_mul(tmp2, ftmp3, ftmp2);
1055 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1056
50e735f9
MC
1057 /*-
1058 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1059 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1060 */
0f113f3e
MC
1061 widefelem_diff(tmp2, tmp);
1062 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1063 felem_reduce(y_out, tmp2);
1064
1065 /*
1066 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1067 * the point at infinity, so we need to check for this separately
1068 */
1069
1070 /*
1071 * if point 1 is at infinity, copy point 2 to output, and vice versa
1072 */
1073 copy_conditional(x_out, x2, z1_is_zero);
1074 copy_conditional(x_out, x1, z2_is_zero);
1075 copy_conditional(y_out, y2, z1_is_zero);
1076 copy_conditional(y_out, y1, z2_is_zero);
1077 copy_conditional(z_out, z2, z1_is_zero);
1078 copy_conditional(z_out, z1, z2_is_zero);
1079 felem_assign(x3, x_out);
1080 felem_assign(y3, y_out);
1081 felem_assign(z3, z_out);
1082}
04daec86 1083
dbd87ffc
MC
1084/*
1085 * select_point selects the |idx|th point from a precomputation table and
1086 * copies it to out.
1087 * The pre_comp array argument should be size of |size| argument
1088 */
0f113f3e
MC
1089static void select_point(const u64 idx, unsigned int size,
1090 const felem pre_comp[][3], felem out[3])
1091{
1092 unsigned i, j;
1093 limb *outlimbs = &out[0][0];
0f113f3e 1094
88f4c6f3 1095 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1096 for (i = 0; i < size; i++) {
1097 const limb *inlimbs = &pre_comp[i][0][0];
1098 u64 mask = i ^ idx;
1099 mask |= mask >> 4;
1100 mask |= mask >> 2;
1101 mask |= mask >> 1;
1102 mask &= 1;
1103 mask--;
1104 for (j = 0; j < 4 * 3; j++)
1105 outlimbs[j] |= inlimbs[j] & mask;
1106 }
1107}
3e00b4c9
BM
1108
1109/* get_bit returns the |i|th bit in |in| */
1110static char get_bit(const felem_bytearray in, unsigned i)
0f113f3e
MC
1111{
1112 if (i >= 224)
1113 return 0;
1114 return (in[i >> 3] >> (i & 7)) & 1;
1115}
1116
1117/*
1118 * Interleaved point multiplication using precomputed point multiples: The
1119 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1120 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1121 * generator, using certain (large) precomputed multiples in g_pre_comp.
1122 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1123 */
3e00b4c9 1124static void batch_mul(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1125 const felem_bytearray scalars[],
1126 const unsigned num_points, const u8 *g_scalar,
1127 const int mixed, const felem pre_comp[][17][3],
1128 const felem g_pre_comp[2][16][3])
1129{
1130 int i, skip;
1131 unsigned num;
1132 unsigned gen_mul = (g_scalar != NULL);
1133 felem nq[3], tmp[4];
1134 u64 bits;
1135 u8 sign, digit;
1136
1137 /* set nq to the point at infinity */
16f8d4eb 1138 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1139
1140 /*
1141 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1142 * of the generator (two in each of the last 28 rounds) and additions of
1143 * other points multiples (every 5th round).
1144 */
1145 skip = 1; /* save two point operations in the first
1146 * round */
1147 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1148 /* double */
1149 if (!skip)
1150 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1151
1152 /* add multiples of the generator */
1153 if (gen_mul && (i <= 27)) {
1154 /* first, look 28 bits upwards */
1155 bits = get_bit(g_scalar, i + 196) << 3;
1156 bits |= get_bit(g_scalar, i + 140) << 2;
1157 bits |= get_bit(g_scalar, i + 84) << 1;
1158 bits |= get_bit(g_scalar, i + 28);
1159 /* select the point to add, in constant time */
1160 select_point(bits, 16, g_pre_comp[1], tmp);
1161
1162 if (!skip) {
1163 /* value 1 below is argument for "mixed" */
1164 point_add(nq[0], nq[1], nq[2],
1165 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1166 } else {
1167 memcpy(nq, tmp, 3 * sizeof(felem));
1168 skip = 0;
1169 }
1170
1171 /* second, look at the current position */
1172 bits = get_bit(g_scalar, i + 168) << 3;
1173 bits |= get_bit(g_scalar, i + 112) << 2;
1174 bits |= get_bit(g_scalar, i + 56) << 1;
1175 bits |= get_bit(g_scalar, i);
1176 /* select the point to add, in constant time */
1177 select_point(bits, 16, g_pre_comp[0], tmp);
1178 point_add(nq[0], nq[1], nq[2],
1179 nq[0], nq[1], nq[2],
1180 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1181 }
1182
1183 /* do other additions every 5 doublings */
1184 if (num_points && (i % 5 == 0)) {
1185 /* loop over all scalars */
1186 for (num = 0; num < num_points; ++num) {
1187 bits = get_bit(scalars[num], i + 4) << 5;
1188 bits |= get_bit(scalars[num], i + 3) << 4;
1189 bits |= get_bit(scalars[num], i + 2) << 3;
1190 bits |= get_bit(scalars[num], i + 1) << 2;
1191 bits |= get_bit(scalars[num], i) << 1;
1192 bits |= get_bit(scalars[num], i - 1);
1193 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1194
1195 /* select the point to add or subtract */
1196 select_point(digit, 17, pre_comp[num], tmp);
1197 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1198 * point */
1199 copy_conditional(tmp[1], tmp[3], sign);
1200
1201 if (!skip) {
1202 point_add(nq[0], nq[1], nq[2],
1203 nq[0], nq[1], nq[2],
1204 mixed, tmp[0], tmp[1], tmp[2]);
1205 } else {
1206 memcpy(nq, tmp, 3 * sizeof(felem));
1207 skip = 0;
1208 }
1209 }
1210 }
1211 }
1212 felem_assign(x_out, nq[0]);
1213 felem_assign(y_out, nq[1]);
1214 felem_assign(z_out, nq[2]);
1215}
04daec86
BM
1216
1217/******************************************************************************/
0f113f3e
MC
1218/*
1219 * FUNCTIONS TO MANAGE PRECOMPUTATION
04daec86
BM
1220 */
1221
3f5abab9 1222static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
0f113f3e 1223{
b51bce94
RS
1224 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1225
0f113f3e
MC
1226 if (!ret) {
1227 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1228 return ret;
1229 }
9b398ef2 1230
0f113f3e 1231 ret->references = 1;
9b398ef2
AG
1232
1233 ret->lock = CRYPTO_THREAD_lock_new();
1234 if (ret->lock == NULL) {
1235 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1236 OPENSSL_free(ret);
1237 return NULL;
1238 }
0f113f3e
MC
1239 return ret;
1240}
04daec86 1241
3aef36ff 1242NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
0f113f3e 1243{
9b398ef2 1244 int i;
3aef36ff 1245 if (p != NULL)
2f545ae4 1246 CRYPTO_UP_REF(&p->references, &i, p->lock);
3aef36ff 1247 return p;
0f113f3e 1248}
04daec86 1249
3aef36ff 1250void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
0f113f3e 1251{
9b398ef2
AG
1252 int i;
1253
1254 if (p == NULL)
1255 return;
1256
2f545ae4 1257 CRYPTO_DOWN_REF(&p->references, &i, p->lock);
9b398ef2
AG
1258 REF_PRINT_COUNT("EC_nistp224", x);
1259 if (i > 0)
0f113f3e 1260 return;
9b398ef2
AG
1261 REF_ASSERT_ISNT(i < 0);
1262
1263 CRYPTO_THREAD_lock_free(p->lock);
3aef36ff 1264 OPENSSL_free(p);
0f113f3e 1265}
04daec86
BM
1266
1267/******************************************************************************/
0f113f3e
MC
1268/*
1269 * OPENSSL EC_METHOD FUNCTIONS
04daec86
BM
1270 */
1271
1272int ec_GFp_nistp224_group_init(EC_GROUP *group)
0f113f3e
MC
1273{
1274 int ret;
1275 ret = ec_GFp_simple_group_init(group);
1276 group->a_is_minus3 = 1;
1277 return ret;
1278}
04daec86
BM
1279
1280int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
0f113f3e
MC
1281 const BIGNUM *a, const BIGNUM *b,
1282 BN_CTX *ctx)
1283{
1284 int ret = 0;
0f113f3e 1285 BIGNUM *curve_p, *curve_a, *curve_b;
a9612d6c
MC
1286#ifndef FIPS_MODE
1287 BN_CTX *new_ctx = NULL;
0f113f3e
MC
1288
1289 if (ctx == NULL)
a6482df0 1290 ctx = new_ctx = BN_CTX_new();
a9612d6c
MC
1291#endif
1292 if (ctx == NULL)
1293 return 0;
1294
0f113f3e 1295 BN_CTX_start(ctx);
edea42c6
PY
1296 curve_p = BN_CTX_get(ctx);
1297 curve_a = BN_CTX_get(ctx);
1298 curve_b = BN_CTX_get(ctx);
1299 if (curve_b == NULL)
0f113f3e
MC
1300 goto err;
1301 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1302 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1303 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1304 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1305 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1306 EC_R_WRONG_CURVE_PARAMETERS);
1307 goto err;
1308 }
1309 group->field_mod_func = BN_nist_mod_224;
1310 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1311 err:
1312 BN_CTX_end(ctx);
a9612d6c 1313#ifndef FIPS_MODE
23a1d5e9 1314 BN_CTX_free(new_ctx);
a9612d6c 1315#endif
0f113f3e
MC
1316 return ret;
1317}
1318
1319/*
1320 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1321 * (X/Z^2, Y/Z^3)
1322 */
04daec86 1323int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
0f113f3e
MC
1324 const EC_POINT *point,
1325 BIGNUM *x, BIGNUM *y,
1326 BN_CTX *ctx)
1327{
1328 felem z1, z2, x_in, y_in, x_out, y_out;
1329 widefelem tmp;
1330
1331 if (EC_POINT_is_at_infinity(group, point)) {
1332 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1333 EC_R_POINT_AT_INFINITY);
1334 return 0;
1335 }
ace8f546
AP
1336 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1337 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1338 return 0;
1339 felem_inv(z2, z1);
1340 felem_square(tmp, z2);
1341 felem_reduce(z1, tmp);
1342 felem_mul(tmp, x_in, z1);
1343 felem_reduce(x_in, tmp);
1344 felem_contract(x_out, x_in);
1345 if (x != NULL) {
1346 if (!felem_to_BN(x, x_out)) {
1347 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348 ERR_R_BN_LIB);
1349 return 0;
1350 }
1351 }
1352 felem_mul(tmp, z1, z2);
1353 felem_reduce(z1, tmp);
1354 felem_mul(tmp, y_in, z1);
1355 felem_reduce(y_in, tmp);
1356 felem_contract(y_out, y_in);
1357 if (y != NULL) {
1358 if (!felem_to_BN(y, y_out)) {
1359 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1360 ERR_R_BN_LIB);
1361 return 0;
1362 }
1363 }
1364 return 1;
1365}
1366
1367static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1368 felem tmp_felems[ /* num+1 */ ])
1369{
1370 /*
1371 * Runs in constant time, unless an input is the point at infinity (which
1372 * normally shouldn't happen).
1373 */
1374 ec_GFp_nistp_points_make_affine_internal(num,
1375 points,
1376 sizeof(felem),
1377 tmp_felems,
1378 (void (*)(void *))felem_one,
0f113f3e
MC
1379 felem_is_zero_int,
1380 (void (*)(void *, const void *))
1381 felem_assign,
1382 (void (*)(void *, const void *))
1383 felem_square_reduce, (void (*)
1384 (void *,
1385 const void
1386 *,
1387 const void
1388 *))
1389 felem_mul_reduce,
1390 (void (*)(void *, const void *))
1391 felem_inv,
1392 (void (*)(void *, const void *))
1393 felem_contract);
1394}
1395
1396/*
1397 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1398 * values Result is stored in r (r can equal one of the inputs).
1399 */
04daec86 1400int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
0f113f3e
MC
1401 const BIGNUM *scalar, size_t num,
1402 const EC_POINT *points[],
1403 const BIGNUM *scalars[], BN_CTX *ctx)
1404{
1405 int ret = 0;
1406 int j;
1407 unsigned i;
1408 int mixed = 0;
0f113f3e
MC
1409 BIGNUM *x, *y, *z, *tmp_scalar;
1410 felem_bytearray g_secret;
1411 felem_bytearray *secrets = NULL;
16f8d4eb 1412 felem (*pre_comp)[17][3] = NULL;
0f113f3e
MC
1413 felem *tmp_felems = NULL;
1414 felem_bytearray tmp;
1415 unsigned num_bytes;
1416 int have_pre_comp = 0;
1417 size_t num_points = num;
1418 felem x_in, y_in, z_in, x_out, y_out, z_out;
1419 NISTP224_PRE_COMP *pre = NULL;
1420 const felem(*g_pre_comp)[16][3] = NULL;
1421 EC_POINT *generator = NULL;
1422 const EC_POINT *p = NULL;
1423 const BIGNUM *p_scalar = NULL;
1424
0f113f3e 1425 BN_CTX_start(ctx);
edea42c6
PY
1426 x = BN_CTX_get(ctx);
1427 y = BN_CTX_get(ctx);
1428 z = BN_CTX_get(ctx);
1429 tmp_scalar = BN_CTX_get(ctx);
1430 if (tmp_scalar == NULL)
0f113f3e
MC
1431 goto err;
1432
1433 if (scalar != NULL) {
3aef36ff 1434 pre = group->pre_comp.nistp224;
0f113f3e
MC
1435 if (pre)
1436 /* we have precomputation, try to use it */
1437 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1438 else
1439 /* try to use the standard precomputation */
1440 g_pre_comp = &gmul[0];
1441 generator = EC_POINT_new(group);
1442 if (generator == NULL)
1443 goto err;
1444 /* get the generator from precomputation */
1445 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1446 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1447 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1448 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1449 goto err;
1450 }
1451 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1452 generator, x, y, z,
1453 ctx))
1454 goto err;
1455 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1456 /* precomputation matches generator */
1457 have_pre_comp = 1;
1458 else
1459 /*
1460 * we don't have valid precomputation: treat the generator as a
1461 * random point
1462 */
1463 num_points = num_points + 1;
1464 }
1465
1466 if (num_points > 0) {
1467 if (num_points >= 3) {
1468 /*
1469 * unless we precompute multiples for just one or two points,
1470 * converting those into affine form is time well spent
1471 */
1472 mixed = 1;
1473 }
b51bce94
RS
1474 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1475 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
1476 if (mixed)
1477 tmp_felems =
16f8d4eb 1478 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
0f113f3e
MC
1479 if ((secrets == NULL) || (pre_comp == NULL)
1480 || (mixed && (tmp_felems == NULL))) {
1481 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1482 goto err;
1483 }
1484
1485 /*
1486 * we treat NULL scalars as 0, and NULL points as points at infinity,
1487 * i.e., they contribute nothing to the linear combination
1488 */
0f113f3e
MC
1489 for (i = 0; i < num_points; ++i) {
1490 if (i == num)
1491 /* the generator */
1492 {
1493 p = EC_GROUP_get0_generator(group);
1494 p_scalar = scalar;
1495 } else
1496 /* the i^th point */
1497 {
1498 p = points[i];
1499 p_scalar = scalars[i];
1500 }
1501 if ((p_scalar != NULL) && (p != NULL)) {
1502 /* reduce scalar to 0 <= scalar < 2^224 */
1503 if ((BN_num_bits(p_scalar) > 224)
1504 || (BN_is_negative(p_scalar))) {
1505 /*
1506 * this is an unusual input, and we don't guarantee
1507 * constant-timeness
1508 */
ace8f546 1509 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
0f113f3e
MC
1510 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1511 goto err;
1512 }
805315d3 1513 num_bytes = BN_bn2binpad(tmp_scalar, tmp, sizeof(tmp));
0f113f3e 1514 } else
805315d3 1515 num_bytes = BN_bn2binpad(p_scalar, tmp, sizeof(tmp));
0f113f3e
MC
1516 flip_endian(secrets[i], tmp, num_bytes);
1517 /* precompute multiples */
ace8f546
AP
1518 if ((!BN_to_felem(x_out, p->X)) ||
1519 (!BN_to_felem(y_out, p->Y)) ||
1520 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
1521 goto err;
1522 felem_assign(pre_comp[i][1][0], x_out);
1523 felem_assign(pre_comp[i][1][1], y_out);
1524 felem_assign(pre_comp[i][1][2], z_out);
1525 for (j = 2; j <= 16; ++j) {
1526 if (j & 1) {
1527 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1528 pre_comp[i][j][2], pre_comp[i][1][0],
1529 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1530 pre_comp[i][j - 1][0],
1531 pre_comp[i][j - 1][1],
1532 pre_comp[i][j - 1][2]);
1533 } else {
1534 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1535 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1536 pre_comp[i][j / 2][1],
1537 pre_comp[i][j / 2][2]);
1538 }
1539 }
1540 }
1541 }
1542 if (mixed)
1543 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1544 }
1545
1546 /* the scalar for the generator */
1547 if ((scalar != NULL) && (have_pre_comp)) {
16f8d4eb 1548 memset(g_secret, 0, sizeof(g_secret));
0f113f3e
MC
1549 /* reduce scalar to 0 <= scalar < 2^224 */
1550 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1551 /*
1552 * this is an unusual input, and we don't guarantee
1553 * constant-timeness
1554 */
ace8f546 1555 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
0f113f3e
MC
1556 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1557 goto err;
1558 }
805315d3 1559 num_bytes = BN_bn2binpad(tmp_scalar, tmp, sizeof(tmp));
0f113f3e 1560 } else
805315d3 1561 num_bytes = BN_bn2binpad(scalar, tmp, sizeof(tmp));
0f113f3e
MC
1562 flip_endian(g_secret, tmp, num_bytes);
1563 /* do the multiplication with generator precomputation */
1564 batch_mul(x_out, y_out, z_out,
1565 (const felem_bytearray(*))secrets, num_points,
1566 g_secret,
1567 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1568 } else
1569 /* do the multiplication without generator precomputation */
1570 batch_mul(x_out, y_out, z_out,
1571 (const felem_bytearray(*))secrets, num_points,
1572 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1573 /* reduce the output to its unique minimal representation */
1574 felem_contract(x_in, x_out);
1575 felem_contract(y_in, y_out);
1576 felem_contract(z_in, z_out);
1577 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1578 (!felem_to_BN(z, z_in))) {
1579 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1580 goto err;
1581 }
1582 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1583
1584 err:
1585 BN_CTX_end(ctx);
8fdc3734 1586 EC_POINT_free(generator);
b548a1f1
RS
1587 OPENSSL_free(secrets);
1588 OPENSSL_free(pre_comp);
1589 OPENSSL_free(tmp_felems);
0f113f3e
MC
1590 return ret;
1591}
04daec86
BM
1592
1593int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
1594{
1595 int ret = 0;
1596 NISTP224_PRE_COMP *pre = NULL;
1597 int i, j;
0f113f3e
MC
1598 BIGNUM *x, *y;
1599 EC_POINT *generator = NULL;
1600 felem tmp_felems[32];
a9612d6c
MC
1601#ifndef FIPS_MODE
1602 BN_CTX *new_ctx = NULL;
1603#endif
0f113f3e
MC
1604
1605 /* throw away old precomputation */
2c52ac9b 1606 EC_pre_comp_free(group);
a9612d6c
MC
1607
1608#ifndef FIPS_MODE
0f113f3e 1609 if (ctx == NULL)
a6482df0 1610 ctx = new_ctx = BN_CTX_new();
a9612d6c
MC
1611#endif
1612 if (ctx == NULL)
1613 return 0;
1614
0f113f3e 1615 BN_CTX_start(ctx);
edea42c6
PY
1616 x = BN_CTX_get(ctx);
1617 y = BN_CTX_get(ctx);
1618 if (y == NULL)
0f113f3e
MC
1619 goto err;
1620 /* get the generator */
1621 if (group->generator == NULL)
1622 goto err;
1623 generator = EC_POINT_new(group);
1624 if (generator == NULL)
1625 goto err;
1626 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1627 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
9cc570d4 1628 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
0f113f3e
MC
1629 goto err;
1630 if ((pre = nistp224_pre_comp_new()) == NULL)
1631 goto err;
1632 /*
1633 * if the generator is the standard one, use built-in precomputation
1634 */
1635 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1636 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 1637 goto done;
0f113f3e 1638 }
ace8f546
AP
1639 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1640 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1641 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
0f113f3e
MC
1642 goto err;
1643 /*
1644 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1645 * 2^140*G, 2^196*G for the second one
1646 */
1647 for (i = 1; i <= 8; i <<= 1) {
1648 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1649 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1650 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1651 for (j = 0; j < 27; ++j) {
1652 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1653 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1654 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1655 }
1656 if (i == 8)
1657 break;
1658 point_double(pre->g_pre_comp[0][2 * i][0],
1659 pre->g_pre_comp[0][2 * i][1],
1660 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1661 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1662 for (j = 0; j < 27; ++j) {
1663 point_double(pre->g_pre_comp[0][2 * i][0],
1664 pre->g_pre_comp[0][2 * i][1],
1665 pre->g_pre_comp[0][2 * i][2],
1666 pre->g_pre_comp[0][2 * i][0],
1667 pre->g_pre_comp[0][2 * i][1],
1668 pre->g_pre_comp[0][2 * i][2]);
1669 }
1670 }
1671 for (i = 0; i < 2; i++) {
1672 /* g_pre_comp[i][0] is the point at infinity */
1673 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1674 /* the remaining multiples */
1675 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1676 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1677 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1678 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1679 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1680 pre->g_pre_comp[i][2][2]);
1681 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1682 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1683 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1684 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1685 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1686 pre->g_pre_comp[i][2][2]);
1687 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1688 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1689 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1690 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1691 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1692 pre->g_pre_comp[i][4][2]);
1693 /*
1694 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1695 */
1696 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1697 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1698 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1699 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1700 pre->g_pre_comp[i][2][2]);
1701 for (j = 1; j < 8; ++j) {
1702 /* odd multiples: add G resp. 2^28*G */
1703 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1704 pre->g_pre_comp[i][2 * j + 1][1],
1705 pre->g_pre_comp[i][2 * j + 1][2],
1706 pre->g_pre_comp[i][2 * j][0],
1707 pre->g_pre_comp[i][2 * j][1],
1708 pre->g_pre_comp[i][2 * j][2], 0,
1709 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1710 pre->g_pre_comp[i][1][2]);
1711 }
1712 }
1713 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1714
615614c8 1715 done:
3aef36ff 1716 SETPRECOMP(group, nistp224, pre);
0f113f3e 1717 pre = NULL;
3aef36ff 1718 ret = 1;
04daec86 1719 err:
0f113f3e 1720 BN_CTX_end(ctx);
8fdc3734 1721 EC_POINT_free(generator);
a9612d6c 1722#ifndef FIPS_MODE
23a1d5e9 1723 BN_CTX_free(new_ctx);
a9612d6c 1724#endif
3aef36ff 1725 EC_nistp224_pre_comp_free(pre);
0f113f3e
MC
1726 return ret;
1727}
04daec86
BM
1728
1729int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
0f113f3e 1730{
3aef36ff 1731 return HAVEPRECOMP(group, nistp224);
0f113f3e 1732}
396cb565 1733
04daec86 1734#endif