]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp224.c
Implement coordinate blinding for EC_POINT
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
CommitLineData
04daec86 1/*
0d664759 2 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
4f22f405
RS
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
04daec86 8 */
4f22f405 9
3e00b4c9 10/* Copyright 2011 Google Inc.
04daec86 11 *
3e00b4c9 12 * Licensed under the Apache License, Version 2.0 (the "License");
04daec86 13 *
3e00b4c9
BM
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
04daec86 16 *
3e00b4c9 17 * http://www.apache.org/licenses/LICENSE-2.0
04daec86 18 *
3e00b4c9
BM
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
04daec86
BM
24 */
25
26/*
27 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28 *
29 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30 * and Adam Langley's public domain 64-bit C implementation of curve25519
31 */
e0d6132b
BM
32
33#include <openssl/opensslconf.h>
effaf4de
RS
34#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35NON_EMPTY_TRANSLATION_UNIT
36#else
e0d6132b 37
0f113f3e
MC
38# include <stdint.h>
39# include <string.h>
40# include <openssl/err.h>
41# include "ec_lcl.h"
04daec86 42
6afed267 43# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
396cb565 44 /* even with gcc, the typedef won't work for 32-bit platforms */
0f113f3e
MC
45typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
46 * platforms */
47# else
8cc1dc36 48# error "Your compiler doesn't appear to support 128-bit integer types"
0f113f3e 49# endif
04daec86
BM
50
51typedef uint8_t u8;
3e00b4c9 52typedef uint64_t u64;
04daec86 53
04daec86 54/******************************************************************************/
1d97c843
TH
55/*-
56 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
04daec86
BM
57 *
58 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
3e00b4c9
BM
59 * using 64-bit coefficients called 'limbs',
60 * and sometimes (for multiplication results) as
04daec86 61 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
3e00b4c9
BM
62 * using 128-bit coefficients called 'widelimbs'.
63 * A 4-limb representation is an 'felem';
64 * a 7-widelimb representation is a 'widefelem'.
65 * Even within felems, bits of adjacent limbs overlap, and we don't always
66 * reduce the representations: we ensure that inputs to each felem
04daec86
BM
67 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68 * and fit into a 128-bit word without overflow. The coefficients are then
3e00b4c9
BM
69 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70 * We only reduce to the unique minimal representation at the end of the
71 * computation.
04daec86
BM
72 */
73
3e00b4c9
BM
74typedef uint64_t limb;
75typedef uint128_t widelimb;
76
77typedef limb felem[4];
78typedef widelimb widefelem[7];
04daec86 79
0f113f3e 80/*
46f4e1be 81 * Field element represented as a byte array. 28*8 = 224 bits is also the
0f113f3e
MC
82 * group order size for the elliptic curve, and we also use this type for
83 * scalars for point multiplication.
84 */
396cb565
BM
85typedef u8 felem_bytearray[28];
86
87static const felem_bytearray nistp224_curve_params[5] = {
0f113f3e
MC
88 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
89 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
90 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
91 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
92 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
94 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
95 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
96 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
97 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
98 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
99 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
100 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
101 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
102 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
396cb565 103};
04daec86 104
1d97c843
TH
105/*-
106 * Precomputed multiples of the standard generator
3e00b4c9
BM
107 * Points are given in coordinates (X, Y, Z) where Z normally is 1
108 * (0 for the point at infinity).
109 * For each field element, slice a_0 is word 0, etc.
110 *
111 * The table has 2 * 16 elements, starting with the following:
112 * index | bits | point
113 * ------+---------+------------------------------
114 * 0 | 0 0 0 0 | 0G
115 * 1 | 0 0 0 1 | 1G
116 * 2 | 0 0 1 0 | 2^56G
117 * 3 | 0 0 1 1 | (2^56 + 1)G
118 * 4 | 0 1 0 0 | 2^112G
119 * 5 | 0 1 0 1 | (2^112 + 1)G
120 * 6 | 0 1 1 0 | (2^112 + 2^56)G
121 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
122 * 8 | 1 0 0 0 | 2^168G
123 * 9 | 1 0 0 1 | (2^168 + 1)G
124 * 10 | 1 0 1 0 | (2^168 + 2^56)G
125 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
126 * 12 | 1 1 0 0 | (2^168 + 2^112)G
127 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
128 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
129 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
130 * followed by a copy of this with each element multiplied by 2^28.
131 *
132 * The reason for this is so that we can clock bits into four different
133 * locations when doing simple scalar multiplies against the base point,
134 * and then another four locations using the second 16 elements.
135 */
4eb504ae
AP
136static const felem gmul[2][16][3] = {
137{{{0, 0, 0, 0},
138 {0, 0, 0, 0},
139 {0, 0, 0, 0}},
140 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
141 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
142 {1, 0, 0, 0}},
143 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
145 {1, 0, 0, 0}},
146 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
147 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
148 {1, 0, 0, 0}},
149 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
151 {1, 0, 0, 0}},
152 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
153 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
154 {1, 0, 0, 0}},
155 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
156 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
157 {1, 0, 0, 0}},
158 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
159 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
160 {1, 0, 0, 0}},
161 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
162 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
163 {1, 0, 0, 0}},
164 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
165 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
166 {1, 0, 0, 0}},
167 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
168 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
169 {1, 0, 0, 0}},
170 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
171 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
172 {1, 0, 0, 0}},
173 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
174 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
175 {1, 0, 0, 0}},
176 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
177 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
178 {1, 0, 0, 0}},
179 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
180 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
181 {1, 0, 0, 0}},
182 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
183 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
184 {1, 0, 0, 0}}},
0f113f3e
MC
185{{{0, 0, 0, 0},
186 {0, 0, 0, 0},
187 {0, 0, 0, 0}},
188 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
189 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
190 {1, 0, 0, 0}},
191 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
192 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
193 {1, 0, 0, 0}},
194 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
195 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
196 {1, 0, 0, 0}},
197 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
198 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
199 {1, 0, 0, 0}},
200 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
201 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
202 {1, 0, 0, 0}},
203 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
204 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
205 {1, 0, 0, 0}},
206 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
207 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
208 {1, 0, 0, 0}},
209 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
210 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
211 {1, 0, 0, 0}},
212 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
213 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
214 {1, 0, 0, 0}},
215 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
216 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
217 {1, 0, 0, 0}},
218 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
219 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
220 {1, 0, 0, 0}},
221 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
222 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
223 {1, 0, 0, 0}},
224 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
225 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
226 {1, 0, 0, 0}},
227 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
228 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
229 {1, 0, 0, 0}},
230 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
231 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232 {1, 0, 0, 0}}}
233};
04daec86
BM
234
235/* Precomputation for the group generator. */
3aef36ff 236struct nistp224_pre_comp_st {
0f113f3e 237 felem g_pre_comp[2][16][3];
2f545ae4 238 CRYPTO_REF_COUNT references;
9b398ef2 239 CRYPTO_RWLOCK *lock;
3aef36ff 240};
04daec86
BM
241
242const EC_METHOD *EC_GFp_nistp224_method(void)
0f113f3e
MC
243{
244 static const EC_METHOD ret = {
245 EC_FLAGS_DEFAULT_OCT,
246 NID_X9_62_prime_field,
247 ec_GFp_nistp224_group_init,
248 ec_GFp_simple_group_finish,
249 ec_GFp_simple_group_clear_finish,
250 ec_GFp_nist_group_copy,
251 ec_GFp_nistp224_group_set_curve,
252 ec_GFp_simple_group_get_curve,
253 ec_GFp_simple_group_get_degree,
9ff9bccc 254 ec_group_simple_order_bits,
0f113f3e
MC
255 ec_GFp_simple_group_check_discriminant,
256 ec_GFp_simple_point_init,
257 ec_GFp_simple_point_finish,
258 ec_GFp_simple_point_clear_finish,
259 ec_GFp_simple_point_copy,
260 ec_GFp_simple_point_set_to_infinity,
261 ec_GFp_simple_set_Jprojective_coordinates_GFp,
262 ec_GFp_simple_get_Jprojective_coordinates_GFp,
263 ec_GFp_simple_point_set_affine_coordinates,
264 ec_GFp_nistp224_point_get_affine_coordinates,
265 0 /* point_set_compressed_coordinates */ ,
266 0 /* point2oct */ ,
267 0 /* oct2point */ ,
268 ec_GFp_simple_add,
269 ec_GFp_simple_dbl,
270 ec_GFp_simple_invert,
271 ec_GFp_simple_is_at_infinity,
272 ec_GFp_simple_is_on_curve,
273 ec_GFp_simple_cmp,
274 ec_GFp_simple_make_affine,
275 ec_GFp_simple_points_make_affine,
276 ec_GFp_nistp224_points_mul,
277 ec_GFp_nistp224_precompute_mult,
278 ec_GFp_nistp224_have_precompute_mult,
279 ec_GFp_nist_field_mul,
280 ec_GFp_nist_field_sqr,
281 0 /* field_div */ ,
282 0 /* field_encode */ ,
283 0 /* field_decode */ ,
9ff9bccc
DSH
284 0, /* field_set_to_one */
285 ec_key_simple_priv2oct,
286 ec_key_simple_oct2priv,
287 0, /* set private */
288 ec_key_simple_generate_key,
289 ec_key_simple_check_key,
290 ec_key_simple_generate_public_key,
291 0, /* keycopy */
292 0, /* keyfinish */
f667820c
SH
293 ecdh_simple_compute_key,
294 0, /* field_inverse_mod_ord */
295 0 /* blind_coordinates */
0f113f3e
MC
296 };
297
298 return &ret;
299}
300
301/*
302 * Helper functions to convert field elements to/from internal representation
303 */
3e00b4c9 304static void bin28_to_felem(felem out, const u8 in[28])
0f113f3e
MC
305{
306 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
307 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
308 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
9fbbdd73 309 out[3] = (*((const uint64_t *)(in+20))) >> 8;
0f113f3e 310}
04daec86 311
3e00b4c9 312static void felem_to_bin28(u8 out[28], const felem in)
0f113f3e
MC
313{
314 unsigned i;
315 for (i = 0; i < 7; ++i) {
316 out[i] = in[0] >> (8 * i);
317 out[i + 7] = in[1] >> (8 * i);
318 out[i + 14] = in[2] >> (8 * i);
319 out[i + 21] = in[3] >> (8 * i);
320 }
321}
04daec86
BM
322
323/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
324static void flip_endian(u8 *out, const u8 *in, unsigned len)
0f113f3e
MC
325{
326 unsigned i;
327 for (i = 0; i < len; ++i)
328 out[i] = in[len - 1 - i];
329}
04daec86
BM
330
331/* From OpenSSL BIGNUM to internal representation */
3e00b4c9 332static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e
MC
333{
334 felem_bytearray b_in;
335 felem_bytearray b_out;
336 unsigned num_bytes;
337
338 /* BN_bn2bin eats leading zeroes */
16f8d4eb 339 memset(b_out, 0, sizeof(b_out));
0f113f3e 340 num_bytes = BN_num_bytes(bn);
cbe29648 341 if (num_bytes > sizeof(b_out)) {
0f113f3e
MC
342 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
343 return 0;
344 }
345 if (BN_is_negative(bn)) {
346 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
347 return 0;
348 }
349 num_bytes = BN_bn2bin(bn, b_in);
350 flip_endian(b_out, b_in, num_bytes);
351 bin28_to_felem(out, b_out);
352 return 1;
353}
04daec86
BM
354
355/* From internal representation to OpenSSL BIGNUM */
3e00b4c9 356static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
0f113f3e
MC
357{
358 felem_bytearray b_in, b_out;
359 felem_to_bin28(b_in, in);
cbe29648
RS
360 flip_endian(b_out, b_in, sizeof(b_out));
361 return BN_bin2bn(b_out, sizeof(b_out), out);
0f113f3e 362}
04daec86
BM
363
364/******************************************************************************/
3a83462d 365/*-
0f113f3e 366 * FIELD OPERATIONS
04daec86
BM
367 *
368 * Field operations, using the internal representation of field elements.
369 * NB! These operations are specific to our point multiplication and cannot be
370 * expected to be correct in general - e.g., multiplication with a large scalar
371 * will cause an overflow.
372 *
373 */
374
3e00b4c9 375static void felem_one(felem out)
0f113f3e
MC
376{
377 out[0] = 1;
378 out[1] = 0;
379 out[2] = 0;
380 out[3] = 0;
381}
3e00b4c9
BM
382
383static void felem_assign(felem out, const felem in)
0f113f3e
MC
384{
385 out[0] = in[0];
386 out[1] = in[1];
387 out[2] = in[2];
388 out[3] = in[3];
389}
3e00b4c9 390
04daec86 391/* Sum two field elements: out += in */
3e00b4c9 392static void felem_sum(felem out, const felem in)
0f113f3e
MC
393{
394 out[0] += in[0];
395 out[1] += in[1];
396 out[2] += in[2];
397 out[3] += in[3];
398}
04daec86
BM
399
400/* Subtract field elements: out -= in */
401/* Assumes in[i] < 2^57 */
3e00b4c9 402static void felem_diff(felem out, const felem in)
0f113f3e
MC
403{
404 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
405 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
406 static const limb two58m42m2 = (((limb) 1) << 58) -
407 (((limb) 1) << 42) - (((limb) 1) << 2);
408
409 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
410 out[0] += two58p2;
411 out[1] += two58m42m2;
412 out[2] += two58m2;
413 out[3] += two58m2;
414
415 out[0] -= in[0];
416 out[1] -= in[1];
417 out[2] -= in[2];
418 out[3] -= in[3];
419}
04daec86 420
3e00b4c9 421/* Subtract in unreduced 128-bit mode: out -= in */
04daec86 422/* Assumes in[i] < 2^119 */
3e00b4c9 423static void widefelem_diff(widefelem out, const widefelem in)
0f113f3e
MC
424{
425 static const widelimb two120 = ((widelimb) 1) << 120;
426 static const widelimb two120m64 = (((widelimb) 1) << 120) -
427 (((widelimb) 1) << 64);
428 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
429 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
430
431 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
432 out[0] += two120;
433 out[1] += two120m64;
434 out[2] += two120m64;
435 out[3] += two120;
436 out[4] += two120m104m64;
437 out[5] += two120m64;
438 out[6] += two120m64;
439
440 out[0] -= in[0];
441 out[1] -= in[1];
442 out[2] -= in[2];
443 out[3] -= in[3];
444 out[4] -= in[4];
445 out[5] -= in[5];
446 out[6] -= in[6];
447}
04daec86
BM
448
449/* Subtract in mixed mode: out128 -= in64 */
450/* in[i] < 2^63 */
3e00b4c9 451static void felem_diff_128_64(widefelem out, const felem in)
0f113f3e
MC
452{
453 static const widelimb two64p8 = (((widelimb) 1) << 64) +
454 (((widelimb) 1) << 8);
455 static const widelimb two64m8 = (((widelimb) 1) << 64) -
456 (((widelimb) 1) << 8);
457 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
458 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
459
460 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
461 out[0] += two64p8;
462 out[1] += two64m48m8;
463 out[2] += two64m8;
464 out[3] += two64m8;
465
466 out[0] -= in[0];
467 out[1] -= in[1];
468 out[2] -= in[2];
469 out[3] -= in[3];
470}
471
472/*
473 * Multiply a field element by a scalar: out = out * scalar The scalars we
474 * actually use are small, so results fit without overflow
475 */
3e00b4c9 476static void felem_scalar(felem out, const limb scalar)
0f113f3e
MC
477{
478 out[0] *= scalar;
479 out[1] *= scalar;
480 out[2] *= scalar;
481 out[3] *= scalar;
482}
483
484/*
485 * Multiply an unreduced field element by a scalar: out = out * scalar The
486 * scalars we actually use are small, so results fit without overflow
487 */
3e00b4c9 488static void widefelem_scalar(widefelem out, const widelimb scalar)
0f113f3e
MC
489{
490 out[0] *= scalar;
491 out[1] *= scalar;
492 out[2] *= scalar;
493 out[3] *= scalar;
494 out[4] *= scalar;
495 out[5] *= scalar;
496 out[6] *= scalar;
497}
04daec86
BM
498
499/* Square a field element: out = in^2 */
3e00b4c9 500static void felem_square(widefelem out, const felem in)
0f113f3e
MC
501{
502 limb tmp0, tmp1, tmp2;
503 tmp0 = 2 * in[0];
504 tmp1 = 2 * in[1];
505 tmp2 = 2 * in[2];
506 out[0] = ((widelimb) in[0]) * in[0];
507 out[1] = ((widelimb) in[0]) * tmp1;
508 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
509 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
510 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
511 out[5] = ((widelimb) in[3]) * tmp2;
512 out[6] = ((widelimb) in[3]) * in[3];
513}
04daec86
BM
514
515/* Multiply two field elements: out = in1 * in2 */
3e00b4c9 516static void felem_mul(widefelem out, const felem in1, const felem in2)
0f113f3e
MC
517{
518 out[0] = ((widelimb) in1[0]) * in2[0];
519 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
520 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
4eb504ae 521 ((widelimb) in1[2]) * in2[0];
0f113f3e 522 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
4eb504ae 523 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
0f113f3e 524 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
4eb504ae 525 ((widelimb) in1[3]) * in2[1];
0f113f3e
MC
526 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
527 out[6] = ((widelimb) in1[3]) * in2[3];
528}
04daec86 529
3a83462d
MC
530/*-
531 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
3e00b4c9
BM
532 * Requires in[i] < 2^126,
533 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
534static void felem_reduce(felem out, const widefelem in)
0f113f3e
MC
535{
536 static const widelimb two127p15 = (((widelimb) 1) << 127) +
537 (((widelimb) 1) << 15);
538 static const widelimb two127m71 = (((widelimb) 1) << 127) -
539 (((widelimb) 1) << 71);
540 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
541 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
542 widelimb output[5];
543
544 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
545 output[0] = in[0] + two127p15;
546 output[1] = in[1] + two127m71m55;
547 output[2] = in[2] + two127m71;
548 output[3] = in[3];
549 output[4] = in[4];
550
551 /* Eliminate in[4], in[5], in[6] */
552 output[4] += in[6] >> 16;
553 output[3] += (in[6] & 0xffff) << 40;
554 output[2] -= in[6];
555
556 output[3] += in[5] >> 16;
557 output[2] += (in[5] & 0xffff) << 40;
558 output[1] -= in[5];
559
560 output[2] += output[4] >> 16;
561 output[1] += (output[4] & 0xffff) << 40;
562 output[0] -= output[4];
563
564 /* Carry 2 -> 3 -> 4 */
565 output[3] += output[2] >> 56;
566 output[2] &= 0x00ffffffffffffff;
567
568 output[4] = output[3] >> 56;
569 output[3] &= 0x00ffffffffffffff;
570
571 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
572
573 /* Eliminate output[4] */
574 output[2] += output[4] >> 16;
575 /* output[2] < 2^56 + 2^56 = 2^57 */
576 output[1] += (output[4] & 0xffff) << 40;
577 output[0] -= output[4];
578
579 /* Carry 0 -> 1 -> 2 -> 3 */
580 output[1] += output[0] >> 56;
581 out[0] = output[0] & 0x00ffffffffffffff;
582
583 output[2] += output[1] >> 56;
584 /* output[2] < 2^57 + 2^72 */
585 out[1] = output[1] & 0x00ffffffffffffff;
586 output[3] += output[2] >> 56;
587 /* output[3] <= 2^56 + 2^16 */
588 out[2] = output[2] & 0x00ffffffffffffff;
589
50e735f9
MC
590 /*-
591 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
592 * out[3] <= 2^56 + 2^16 (due to final carry),
593 * so out < 2*p
594 */
0f113f3e
MC
595 out[3] = output[3];
596}
04daec86 597
3e00b4c9 598static void felem_square_reduce(felem out, const felem in)
0f113f3e
MC
599{
600 widefelem tmp;
601 felem_square(tmp, in);
602 felem_reduce(out, tmp);
603}
04daec86 604
3e00b4c9 605static void felem_mul_reduce(felem out, const felem in1, const felem in2)
0f113f3e
MC
606{
607 widefelem tmp;
608 felem_mul(tmp, in1, in2);
609 felem_reduce(out, tmp);
610}
611
612/*
613 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
614 * call felem_reduce first)
615 */
3e00b4c9 616static void felem_contract(felem out, const felem in)
0f113f3e
MC
617{
618 static const int64_t two56 = ((limb) 1) << 56;
619 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
620 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
621 int64_t tmp[4], a;
622 tmp[0] = in[0];
623 tmp[1] = in[1];
624 tmp[2] = in[2];
625 tmp[3] = in[3];
626 /* Case 1: a = 1 iff in >= 2^224 */
627 a = (in[3] >> 56);
628 tmp[0] -= a;
629 tmp[1] += a << 40;
630 tmp[3] &= 0x00ffffffffffffff;
631 /*
632 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
633 * and the lower part is non-zero
634 */
635 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
636 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
637 a &= 0x00ffffffffffffff;
638 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
639 a = (a - 1) >> 63;
640 /* subtract 2^224 - 2^96 + 1 if a is all-one */
641 tmp[3] &= a ^ 0xffffffffffffffff;
642 tmp[2] &= a ^ 0xffffffffffffffff;
643 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
644 tmp[0] -= 1 & a;
645
646 /*
647 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
648 * non-zero, so we only need one step
649 */
650 a = tmp[0] >> 63;
651 tmp[0] += two56 & a;
652 tmp[1] -= 1 & a;
653
654 /* carry 1 -> 2 -> 3 */
655 tmp[2] += tmp[1] >> 56;
656 tmp[1] &= 0x00ffffffffffffff;
657
658 tmp[3] += tmp[2] >> 56;
659 tmp[2] &= 0x00ffffffffffffff;
660
661 /* Now 0 <= out < p */
662 out[0] = tmp[0];
663 out[1] = tmp[1];
664 out[2] = tmp[2];
665 out[3] = tmp[3];
666}
667
dc55e4f7
DB
668/*
669 * Get negative value: out = -in
670 * Requires in[i] < 2^63,
671 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
672 */
673static void felem_neg(felem out, const felem in)
674{
675 widefelem tmp = {0};
676 felem_diff_128_64(tmp, in);
677 felem_reduce(out, tmp);
678}
679
0f113f3e
MC
680/*
681 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
682 * elements are reduced to in < 2^225, so we only need to check three cases:
683 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
684 */
3e00b4c9 685static limb felem_is_zero(const felem in)
0f113f3e
MC
686{
687 limb zero, two224m96p1, two225m97p2;
688
689 zero = in[0] | in[1] | in[2] | in[3];
690 zero = (((int64_t) (zero) - 1) >> 63) & 1;
691 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
692 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
693 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
694 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
695 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
696 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
697 return (zero | two224m96p1 | two225m97p2);
698}
04daec86 699
c55b786a 700static int felem_is_zero_int(const void *in)
0f113f3e
MC
701{
702 return (int)(felem_is_zero(in) & ((limb) 1));
703}
3e00b4c9 704
04daec86
BM
705/* Invert a field element */
706/* Computation chain copied from djb's code */
3e00b4c9 707static void felem_inv(felem out, const felem in)
0f113f3e
MC
708{
709 felem ftmp, ftmp2, ftmp3, ftmp4;
710 widefelem tmp;
711 unsigned i;
712
713 felem_square(tmp, in);
714 felem_reduce(ftmp, tmp); /* 2 */
715 felem_mul(tmp, in, ftmp);
716 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
717 felem_square(tmp, ftmp);
718 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
719 felem_mul(tmp, in, ftmp);
720 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
721 felem_square(tmp, ftmp);
722 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
723 felem_square(tmp, ftmp2);
724 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
725 felem_square(tmp, ftmp2);
726 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
727 felem_mul(tmp, ftmp2, ftmp);
728 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
729 felem_square(tmp, ftmp);
730 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
731 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
732 felem_square(tmp, ftmp2);
733 felem_reduce(ftmp2, tmp);
734 }
735 felem_mul(tmp, ftmp2, ftmp);
736 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
737 felem_square(tmp, ftmp2);
738 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
739 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
740 felem_square(tmp, ftmp3);
741 felem_reduce(ftmp3, tmp);
742 }
743 felem_mul(tmp, ftmp3, ftmp2);
744 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
745 felem_square(tmp, ftmp2);
746 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
747 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
748 felem_square(tmp, ftmp3);
749 felem_reduce(ftmp3, tmp);
750 }
751 felem_mul(tmp, ftmp3, ftmp2);
752 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
753 felem_square(tmp, ftmp3);
754 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
755 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
756 felem_square(tmp, ftmp4);
757 felem_reduce(ftmp4, tmp);
758 }
759 felem_mul(tmp, ftmp3, ftmp4);
760 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
761 felem_square(tmp, ftmp3);
762 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
763 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
764 felem_square(tmp, ftmp4);
765 felem_reduce(ftmp4, tmp);
766 }
767 felem_mul(tmp, ftmp2, ftmp4);
768 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
769 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
770 felem_square(tmp, ftmp2);
771 felem_reduce(ftmp2, tmp);
772 }
773 felem_mul(tmp, ftmp2, ftmp);
774 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
775 felem_square(tmp, ftmp);
776 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
777 felem_mul(tmp, ftmp, in);
778 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
779 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
780 felem_square(tmp, ftmp);
781 felem_reduce(ftmp, tmp);
782 }
783 felem_mul(tmp, ftmp, ftmp3);
784 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
785}
786
787/*
788 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
789 * out to itself.
790 */
791static void copy_conditional(felem out, const felem in, limb icopy)
792{
793 unsigned i;
794 /*
795 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
796 */
797 const limb copy = -icopy;
798 for (i = 0; i < 4; ++i) {
799 const limb tmp = copy & (in[i] ^ out[i]);
800 out[i] ^= tmp;
801 }
802}
04daec86 803
04daec86 804/******************************************************************************/
3a83462d 805/*-
0f113f3e 806 * ELLIPTIC CURVE POINT OPERATIONS
04daec86
BM
807 *
808 * Points are represented in Jacobian projective coordinates:
809 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
810 * or to the point at infinity if Z == 0.
811 *
812 */
813
1d97c843
TH
814/*-
815 * Double an elliptic curve point:
04daec86
BM
816 * (X', Y', Z') = 2 * (X, Y, Z), where
817 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
eb8e052c 818 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
04daec86
BM
819 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
820 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
0f113f3e 821 * while x_out == y_in is not (maybe this works, but it's not tested).
1d97c843 822 */
04daec86 823static void
3e00b4c9
BM
824point_double(felem x_out, felem y_out, felem z_out,
825 const felem x_in, const felem y_in, const felem z_in)
0f113f3e
MC
826{
827 widefelem tmp, tmp2;
828 felem delta, gamma, beta, alpha, ftmp, ftmp2;
829
830 felem_assign(ftmp, x_in);
831 felem_assign(ftmp2, x_in);
832
833 /* delta = z^2 */
834 felem_square(tmp, z_in);
835 felem_reduce(delta, tmp);
836
837 /* gamma = y^2 */
838 felem_square(tmp, y_in);
839 felem_reduce(gamma, tmp);
840
841 /* beta = x*gamma */
842 felem_mul(tmp, x_in, gamma);
843 felem_reduce(beta, tmp);
844
845 /* alpha = 3*(x-delta)*(x+delta) */
846 felem_diff(ftmp, delta);
847 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
848 felem_sum(ftmp2, delta);
849 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
850 felem_scalar(ftmp2, 3);
851 /* ftmp2[i] < 3 * 2^58 < 2^60 */
852 felem_mul(tmp, ftmp, ftmp2);
853 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
854 felem_reduce(alpha, tmp);
855
856 /* x' = alpha^2 - 8*beta */
857 felem_square(tmp, alpha);
858 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
859 felem_assign(ftmp, beta);
860 felem_scalar(ftmp, 8);
861 /* ftmp[i] < 8 * 2^57 = 2^60 */
862 felem_diff_128_64(tmp, ftmp);
863 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
864 felem_reduce(x_out, tmp);
865
866 /* z' = (y + z)^2 - gamma - delta */
867 felem_sum(delta, gamma);
868 /* delta[i] < 2^57 + 2^57 = 2^58 */
869 felem_assign(ftmp, y_in);
870 felem_sum(ftmp, z_in);
871 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
872 felem_square(tmp, ftmp);
873 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
874 felem_diff_128_64(tmp, delta);
875 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
876 felem_reduce(z_out, tmp);
877
878 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
879 felem_scalar(beta, 4);
880 /* beta[i] < 4 * 2^57 = 2^59 */
881 felem_diff(beta, x_out);
882 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
883 felem_mul(tmp, alpha, beta);
884 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
885 felem_square(tmp2, gamma);
886 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
887 widefelem_scalar(tmp2, 8);
888 /* tmp2[i] < 8 * 2^116 = 2^119 */
889 widefelem_diff(tmp, tmp2);
890 /* tmp[i] < 2^119 + 2^120 < 2^121 */
891 felem_reduce(y_out, tmp);
892}
04daec86 893
1d97c843
TH
894/*-
895 * Add two elliptic curve points:
04daec86
BM
896 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
897 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
898 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
899 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
900 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
3e00b4c9
BM
901 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
902 *
903 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
904 */
04daec86 905
0f113f3e
MC
906/*
907 * This function is not entirely constant-time: it includes a branch for
908 * checking whether the two input points are equal, (while not equal to the
909 * point at infinity). This case never happens during single point
910 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
911 */
3e00b4c9 912static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
913 const felem x1, const felem y1, const felem z1,
914 const int mixed, const felem x2, const felem y2,
915 const felem z2)
916{
917 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
918 widefelem tmp, tmp2;
919 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
920
921 if (!mixed) {
922 /* ftmp2 = z2^2 */
923 felem_square(tmp, z2);
924 felem_reduce(ftmp2, tmp);
925
926 /* ftmp4 = z2^3 */
927 felem_mul(tmp, ftmp2, z2);
928 felem_reduce(ftmp4, tmp);
929
930 /* ftmp4 = z2^3*y1 */
931 felem_mul(tmp2, ftmp4, y1);
932 felem_reduce(ftmp4, tmp2);
933
934 /* ftmp2 = z2^2*x1 */
935 felem_mul(tmp2, ftmp2, x1);
936 felem_reduce(ftmp2, tmp2);
937 } else {
938 /*
939 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
940 */
941
942 /* ftmp4 = z2^3*y1 */
943 felem_assign(ftmp4, y1);
944
945 /* ftmp2 = z2^2*x1 */
946 felem_assign(ftmp2, x1);
947 }
948
949 /* ftmp = z1^2 */
950 felem_square(tmp, z1);
951 felem_reduce(ftmp, tmp);
952
953 /* ftmp3 = z1^3 */
954 felem_mul(tmp, ftmp, z1);
955 felem_reduce(ftmp3, tmp);
956
957 /* tmp = z1^3*y2 */
958 felem_mul(tmp, ftmp3, y2);
959 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
960
961 /* ftmp3 = z1^3*y2 - z2^3*y1 */
962 felem_diff_128_64(tmp, ftmp4);
963 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
964 felem_reduce(ftmp3, tmp);
965
966 /* tmp = z1^2*x2 */
967 felem_mul(tmp, ftmp, x2);
968 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
969
970 /* ftmp = z1^2*x2 - z2^2*x1 */
971 felem_diff_128_64(tmp, ftmp2);
972 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
973 felem_reduce(ftmp, tmp);
974
975 /*
976 * the formulae are incorrect if the points are equal so we check for
977 * this and do doubling if this happens
978 */
979 x_equal = felem_is_zero(ftmp);
980 y_equal = felem_is_zero(ftmp3);
981 z1_is_zero = felem_is_zero(z1);
982 z2_is_zero = felem_is_zero(z2);
983 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
984 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
985 point_double(x3, y3, z3, x1, y1, z1);
986 return;
987 }
988
989 /* ftmp5 = z1*z2 */
990 if (!mixed) {
991 felem_mul(tmp, z1, z2);
992 felem_reduce(ftmp5, tmp);
993 } else {
994 /* special case z2 = 0 is handled later */
995 felem_assign(ftmp5, z1);
996 }
997
998 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
999 felem_mul(tmp, ftmp, ftmp5);
1000 felem_reduce(z_out, tmp);
1001
1002 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1003 felem_assign(ftmp5, ftmp);
1004 felem_square(tmp, ftmp);
1005 felem_reduce(ftmp, tmp);
1006
1007 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1008 felem_mul(tmp, ftmp, ftmp5);
1009 felem_reduce(ftmp5, tmp);
1010
1011 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1012 felem_mul(tmp, ftmp2, ftmp);
1013 felem_reduce(ftmp2, tmp);
1014
1015 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1016 felem_mul(tmp, ftmp4, ftmp5);
1017 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1018
1019 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1020 felem_square(tmp2, ftmp3);
1021 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1022
1023 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1024 felem_diff_128_64(tmp2, ftmp5);
1025 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1026
1027 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1028 felem_assign(ftmp5, ftmp2);
1029 felem_scalar(ftmp5, 2);
1030 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1031
50e735f9
MC
1032 /*-
1033 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1034 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1035 */
0f113f3e
MC
1036 felem_diff_128_64(tmp2, ftmp5);
1037 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1038 felem_reduce(x_out, tmp2);
1039
1040 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1041 felem_diff(ftmp2, x_out);
1042 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1043
1044 /*
1045 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1046 */
1047 felem_mul(tmp2, ftmp3, ftmp2);
1048 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1049
50e735f9
MC
1050 /*-
1051 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1052 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1053 */
0f113f3e
MC
1054 widefelem_diff(tmp2, tmp);
1055 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1056 felem_reduce(y_out, tmp2);
1057
1058 /*
1059 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1060 * the point at infinity, so we need to check for this separately
1061 */
1062
1063 /*
1064 * if point 1 is at infinity, copy point 2 to output, and vice versa
1065 */
1066 copy_conditional(x_out, x2, z1_is_zero);
1067 copy_conditional(x_out, x1, z2_is_zero);
1068 copy_conditional(y_out, y2, z1_is_zero);
1069 copy_conditional(y_out, y1, z2_is_zero);
1070 copy_conditional(z_out, z2, z1_is_zero);
1071 copy_conditional(z_out, z1, z2_is_zero);
1072 felem_assign(x3, x_out);
1073 felem_assign(y3, y_out);
1074 felem_assign(z3, z_out);
1075}
04daec86 1076
dbd87ffc
MC
1077/*
1078 * select_point selects the |idx|th point from a precomputation table and
1079 * copies it to out.
1080 * The pre_comp array argument should be size of |size| argument
1081 */
0f113f3e
MC
1082static void select_point(const u64 idx, unsigned int size,
1083 const felem pre_comp[][3], felem out[3])
1084{
1085 unsigned i, j;
1086 limb *outlimbs = &out[0][0];
0f113f3e 1087
88f4c6f3 1088 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1089 for (i = 0; i < size; i++) {
1090 const limb *inlimbs = &pre_comp[i][0][0];
1091 u64 mask = i ^ idx;
1092 mask |= mask >> 4;
1093 mask |= mask >> 2;
1094 mask |= mask >> 1;
1095 mask &= 1;
1096 mask--;
1097 for (j = 0; j < 4 * 3; j++)
1098 outlimbs[j] |= inlimbs[j] & mask;
1099 }
1100}
3e00b4c9
BM
1101
1102/* get_bit returns the |i|th bit in |in| */
1103static char get_bit(const felem_bytearray in, unsigned i)
0f113f3e
MC
1104{
1105 if (i >= 224)
1106 return 0;
1107 return (in[i >> 3] >> (i & 7)) & 1;
1108}
1109
1110/*
1111 * Interleaved point multiplication using precomputed point multiples: The
1112 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1113 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1114 * generator, using certain (large) precomputed multiples in g_pre_comp.
1115 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1116 */
3e00b4c9 1117static void batch_mul(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1118 const felem_bytearray scalars[],
1119 const unsigned num_points, const u8 *g_scalar,
1120 const int mixed, const felem pre_comp[][17][3],
1121 const felem g_pre_comp[2][16][3])
1122{
1123 int i, skip;
1124 unsigned num;
1125 unsigned gen_mul = (g_scalar != NULL);
1126 felem nq[3], tmp[4];
1127 u64 bits;
1128 u8 sign, digit;
1129
1130 /* set nq to the point at infinity */
16f8d4eb 1131 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1132
1133 /*
1134 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1135 * of the generator (two in each of the last 28 rounds) and additions of
1136 * other points multiples (every 5th round).
1137 */
1138 skip = 1; /* save two point operations in the first
1139 * round */
1140 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1141 /* double */
1142 if (!skip)
1143 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1144
1145 /* add multiples of the generator */
1146 if (gen_mul && (i <= 27)) {
1147 /* first, look 28 bits upwards */
1148 bits = get_bit(g_scalar, i + 196) << 3;
1149 bits |= get_bit(g_scalar, i + 140) << 2;
1150 bits |= get_bit(g_scalar, i + 84) << 1;
1151 bits |= get_bit(g_scalar, i + 28);
1152 /* select the point to add, in constant time */
1153 select_point(bits, 16, g_pre_comp[1], tmp);
1154
1155 if (!skip) {
1156 /* value 1 below is argument for "mixed" */
1157 point_add(nq[0], nq[1], nq[2],
1158 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1159 } else {
1160 memcpy(nq, tmp, 3 * sizeof(felem));
1161 skip = 0;
1162 }
1163
1164 /* second, look at the current position */
1165 bits = get_bit(g_scalar, i + 168) << 3;
1166 bits |= get_bit(g_scalar, i + 112) << 2;
1167 bits |= get_bit(g_scalar, i + 56) << 1;
1168 bits |= get_bit(g_scalar, i);
1169 /* select the point to add, in constant time */
1170 select_point(bits, 16, g_pre_comp[0], tmp);
1171 point_add(nq[0], nq[1], nq[2],
1172 nq[0], nq[1], nq[2],
1173 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1174 }
1175
1176 /* do other additions every 5 doublings */
1177 if (num_points && (i % 5 == 0)) {
1178 /* loop over all scalars */
1179 for (num = 0; num < num_points; ++num) {
1180 bits = get_bit(scalars[num], i + 4) << 5;
1181 bits |= get_bit(scalars[num], i + 3) << 4;
1182 bits |= get_bit(scalars[num], i + 2) << 3;
1183 bits |= get_bit(scalars[num], i + 1) << 2;
1184 bits |= get_bit(scalars[num], i) << 1;
1185 bits |= get_bit(scalars[num], i - 1);
1186 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1187
1188 /* select the point to add or subtract */
1189 select_point(digit, 17, pre_comp[num], tmp);
1190 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1191 * point */
1192 copy_conditional(tmp[1], tmp[3], sign);
1193
1194 if (!skip) {
1195 point_add(nq[0], nq[1], nq[2],
1196 nq[0], nq[1], nq[2],
1197 mixed, tmp[0], tmp[1], tmp[2]);
1198 } else {
1199 memcpy(nq, tmp, 3 * sizeof(felem));
1200 skip = 0;
1201 }
1202 }
1203 }
1204 }
1205 felem_assign(x_out, nq[0]);
1206 felem_assign(y_out, nq[1]);
1207 felem_assign(z_out, nq[2]);
1208}
04daec86
BM
1209
1210/******************************************************************************/
0f113f3e
MC
1211/*
1212 * FUNCTIONS TO MANAGE PRECOMPUTATION
04daec86
BM
1213 */
1214
1215static NISTP224_PRE_COMP *nistp224_pre_comp_new()
0f113f3e 1216{
b51bce94
RS
1217 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1218
0f113f3e
MC
1219 if (!ret) {
1220 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1221 return ret;
1222 }
9b398ef2 1223
0f113f3e 1224 ret->references = 1;
9b398ef2
AG
1225
1226 ret->lock = CRYPTO_THREAD_lock_new();
1227 if (ret->lock == NULL) {
1228 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1229 OPENSSL_free(ret);
1230 return NULL;
1231 }
0f113f3e
MC
1232 return ret;
1233}
04daec86 1234
3aef36ff 1235NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
0f113f3e 1236{
9b398ef2 1237 int i;
3aef36ff 1238 if (p != NULL)
2f545ae4 1239 CRYPTO_UP_REF(&p->references, &i, p->lock);
3aef36ff 1240 return p;
0f113f3e 1241}
04daec86 1242
3aef36ff 1243void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
0f113f3e 1244{
9b398ef2
AG
1245 int i;
1246
1247 if (p == NULL)
1248 return;
1249
2f545ae4 1250 CRYPTO_DOWN_REF(&p->references, &i, p->lock);
9b398ef2
AG
1251 REF_PRINT_COUNT("EC_nistp224", x);
1252 if (i > 0)
0f113f3e 1253 return;
9b398ef2
AG
1254 REF_ASSERT_ISNT(i < 0);
1255
1256 CRYPTO_THREAD_lock_free(p->lock);
3aef36ff 1257 OPENSSL_free(p);
0f113f3e 1258}
04daec86
BM
1259
1260/******************************************************************************/
0f113f3e
MC
1261/*
1262 * OPENSSL EC_METHOD FUNCTIONS
04daec86
BM
1263 */
1264
1265int ec_GFp_nistp224_group_init(EC_GROUP *group)
0f113f3e
MC
1266{
1267 int ret;
1268 ret = ec_GFp_simple_group_init(group);
1269 group->a_is_minus3 = 1;
1270 return ret;
1271}
04daec86
BM
1272
1273int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
0f113f3e
MC
1274 const BIGNUM *a, const BIGNUM *b,
1275 BN_CTX *ctx)
1276{
1277 int ret = 0;
1278 BN_CTX *new_ctx = NULL;
1279 BIGNUM *curve_p, *curve_a, *curve_b;
1280
1281 if (ctx == NULL)
1282 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1283 return 0;
1284 BN_CTX_start(ctx);
edea42c6
PY
1285 curve_p = BN_CTX_get(ctx);
1286 curve_a = BN_CTX_get(ctx);
1287 curve_b = BN_CTX_get(ctx);
1288 if (curve_b == NULL)
0f113f3e
MC
1289 goto err;
1290 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1291 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1292 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1293 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1294 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1295 EC_R_WRONG_CURVE_PARAMETERS);
1296 goto err;
1297 }
1298 group->field_mod_func = BN_nist_mod_224;
1299 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1300 err:
1301 BN_CTX_end(ctx);
23a1d5e9 1302 BN_CTX_free(new_ctx);
0f113f3e
MC
1303 return ret;
1304}
1305
1306/*
1307 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1308 * (X/Z^2, Y/Z^3)
1309 */
04daec86 1310int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
0f113f3e
MC
1311 const EC_POINT *point,
1312 BIGNUM *x, BIGNUM *y,
1313 BN_CTX *ctx)
1314{
1315 felem z1, z2, x_in, y_in, x_out, y_out;
1316 widefelem tmp;
1317
1318 if (EC_POINT_is_at_infinity(group, point)) {
1319 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1320 EC_R_POINT_AT_INFINITY);
1321 return 0;
1322 }
ace8f546
AP
1323 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1324 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1325 return 0;
1326 felem_inv(z2, z1);
1327 felem_square(tmp, z2);
1328 felem_reduce(z1, tmp);
1329 felem_mul(tmp, x_in, z1);
1330 felem_reduce(x_in, tmp);
1331 felem_contract(x_out, x_in);
1332 if (x != NULL) {
1333 if (!felem_to_BN(x, x_out)) {
1334 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1335 ERR_R_BN_LIB);
1336 return 0;
1337 }
1338 }
1339 felem_mul(tmp, z1, z2);
1340 felem_reduce(z1, tmp);
1341 felem_mul(tmp, y_in, z1);
1342 felem_reduce(y_in, tmp);
1343 felem_contract(y_out, y_in);
1344 if (y != NULL) {
1345 if (!felem_to_BN(y, y_out)) {
1346 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1347 ERR_R_BN_LIB);
1348 return 0;
1349 }
1350 }
1351 return 1;
1352}
1353
1354static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1355 felem tmp_felems[ /* num+1 */ ])
1356{
1357 /*
1358 * Runs in constant time, unless an input is the point at infinity (which
1359 * normally shouldn't happen).
1360 */
1361 ec_GFp_nistp_points_make_affine_internal(num,
1362 points,
1363 sizeof(felem),
1364 tmp_felems,
1365 (void (*)(void *))felem_one,
0f113f3e
MC
1366 felem_is_zero_int,
1367 (void (*)(void *, const void *))
1368 felem_assign,
1369 (void (*)(void *, const void *))
1370 felem_square_reduce, (void (*)
1371 (void *,
1372 const void
1373 *,
1374 const void
1375 *))
1376 felem_mul_reduce,
1377 (void (*)(void *, const void *))
1378 felem_inv,
1379 (void (*)(void *, const void *))
1380 felem_contract);
1381}
1382
1383/*
1384 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1385 * values Result is stored in r (r can equal one of the inputs).
1386 */
04daec86 1387int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
0f113f3e
MC
1388 const BIGNUM *scalar, size_t num,
1389 const EC_POINT *points[],
1390 const BIGNUM *scalars[], BN_CTX *ctx)
1391{
1392 int ret = 0;
1393 int j;
1394 unsigned i;
1395 int mixed = 0;
1396 BN_CTX *new_ctx = NULL;
1397 BIGNUM *x, *y, *z, *tmp_scalar;
1398 felem_bytearray g_secret;
1399 felem_bytearray *secrets = NULL;
16f8d4eb 1400 felem (*pre_comp)[17][3] = NULL;
0f113f3e
MC
1401 felem *tmp_felems = NULL;
1402 felem_bytearray tmp;
1403 unsigned num_bytes;
1404 int have_pre_comp = 0;
1405 size_t num_points = num;
1406 felem x_in, y_in, z_in, x_out, y_out, z_out;
1407 NISTP224_PRE_COMP *pre = NULL;
1408 const felem(*g_pre_comp)[16][3] = NULL;
1409 EC_POINT *generator = NULL;
1410 const EC_POINT *p = NULL;
1411 const BIGNUM *p_scalar = NULL;
1412
1413 if (ctx == NULL)
1414 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1415 return 0;
1416 BN_CTX_start(ctx);
edea42c6
PY
1417 x = BN_CTX_get(ctx);
1418 y = BN_CTX_get(ctx);
1419 z = BN_CTX_get(ctx);
1420 tmp_scalar = BN_CTX_get(ctx);
1421 if (tmp_scalar == NULL)
0f113f3e
MC
1422 goto err;
1423
1424 if (scalar != NULL) {
3aef36ff 1425 pre = group->pre_comp.nistp224;
0f113f3e
MC
1426 if (pre)
1427 /* we have precomputation, try to use it */
1428 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1429 else
1430 /* try to use the standard precomputation */
1431 g_pre_comp = &gmul[0];
1432 generator = EC_POINT_new(group);
1433 if (generator == NULL)
1434 goto err;
1435 /* get the generator from precomputation */
1436 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1437 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1438 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1439 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1440 goto err;
1441 }
1442 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1443 generator, x, y, z,
1444 ctx))
1445 goto err;
1446 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1447 /* precomputation matches generator */
1448 have_pre_comp = 1;
1449 else
1450 /*
1451 * we don't have valid precomputation: treat the generator as a
1452 * random point
1453 */
1454 num_points = num_points + 1;
1455 }
1456
1457 if (num_points > 0) {
1458 if (num_points >= 3) {
1459 /*
1460 * unless we precompute multiples for just one or two points,
1461 * converting those into affine form is time well spent
1462 */
1463 mixed = 1;
1464 }
b51bce94
RS
1465 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1466 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
1467 if (mixed)
1468 tmp_felems =
16f8d4eb 1469 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
0f113f3e
MC
1470 if ((secrets == NULL) || (pre_comp == NULL)
1471 || (mixed && (tmp_felems == NULL))) {
1472 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1473 goto err;
1474 }
1475
1476 /*
1477 * we treat NULL scalars as 0, and NULL points as points at infinity,
1478 * i.e., they contribute nothing to the linear combination
1479 */
0f113f3e
MC
1480 for (i = 0; i < num_points; ++i) {
1481 if (i == num)
1482 /* the generator */
1483 {
1484 p = EC_GROUP_get0_generator(group);
1485 p_scalar = scalar;
1486 } else
1487 /* the i^th point */
1488 {
1489 p = points[i];
1490 p_scalar = scalars[i];
1491 }
1492 if ((p_scalar != NULL) && (p != NULL)) {
1493 /* reduce scalar to 0 <= scalar < 2^224 */
1494 if ((BN_num_bits(p_scalar) > 224)
1495 || (BN_is_negative(p_scalar))) {
1496 /*
1497 * this is an unusual input, and we don't guarantee
1498 * constant-timeness
1499 */
ace8f546 1500 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
0f113f3e
MC
1501 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1502 goto err;
1503 }
1504 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1505 } else
1506 num_bytes = BN_bn2bin(p_scalar, tmp);
1507 flip_endian(secrets[i], tmp, num_bytes);
1508 /* precompute multiples */
ace8f546
AP
1509 if ((!BN_to_felem(x_out, p->X)) ||
1510 (!BN_to_felem(y_out, p->Y)) ||
1511 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
1512 goto err;
1513 felem_assign(pre_comp[i][1][0], x_out);
1514 felem_assign(pre_comp[i][1][1], y_out);
1515 felem_assign(pre_comp[i][1][2], z_out);
1516 for (j = 2; j <= 16; ++j) {
1517 if (j & 1) {
1518 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1519 pre_comp[i][j][2], pre_comp[i][1][0],
1520 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1521 pre_comp[i][j - 1][0],
1522 pre_comp[i][j - 1][1],
1523 pre_comp[i][j - 1][2]);
1524 } else {
1525 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1526 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1527 pre_comp[i][j / 2][1],
1528 pre_comp[i][j / 2][2]);
1529 }
1530 }
1531 }
1532 }
1533 if (mixed)
1534 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1535 }
1536
1537 /* the scalar for the generator */
1538 if ((scalar != NULL) && (have_pre_comp)) {
16f8d4eb 1539 memset(g_secret, 0, sizeof(g_secret));
0f113f3e
MC
1540 /* reduce scalar to 0 <= scalar < 2^224 */
1541 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1542 /*
1543 * this is an unusual input, and we don't guarantee
1544 * constant-timeness
1545 */
ace8f546 1546 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
0f113f3e
MC
1547 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1548 goto err;
1549 }
1550 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1551 } else
1552 num_bytes = BN_bn2bin(scalar, tmp);
1553 flip_endian(g_secret, tmp, num_bytes);
1554 /* do the multiplication with generator precomputation */
1555 batch_mul(x_out, y_out, z_out,
1556 (const felem_bytearray(*))secrets, num_points,
1557 g_secret,
1558 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1559 } else
1560 /* do the multiplication without generator precomputation */
1561 batch_mul(x_out, y_out, z_out,
1562 (const felem_bytearray(*))secrets, num_points,
1563 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1564 /* reduce the output to its unique minimal representation */
1565 felem_contract(x_in, x_out);
1566 felem_contract(y_in, y_out);
1567 felem_contract(z_in, z_out);
1568 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1569 (!felem_to_BN(z, z_in))) {
1570 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1571 goto err;
1572 }
1573 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1574
1575 err:
1576 BN_CTX_end(ctx);
8fdc3734 1577 EC_POINT_free(generator);
23a1d5e9 1578 BN_CTX_free(new_ctx);
b548a1f1
RS
1579 OPENSSL_free(secrets);
1580 OPENSSL_free(pre_comp);
1581 OPENSSL_free(tmp_felems);
0f113f3e
MC
1582 return ret;
1583}
04daec86
BM
1584
1585int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
1586{
1587 int ret = 0;
1588 NISTP224_PRE_COMP *pre = NULL;
1589 int i, j;
1590 BN_CTX *new_ctx = NULL;
1591 BIGNUM *x, *y;
1592 EC_POINT *generator = NULL;
1593 felem tmp_felems[32];
1594
1595 /* throw away old precomputation */
2c52ac9b 1596 EC_pre_comp_free(group);
0f113f3e
MC
1597 if (ctx == NULL)
1598 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1599 return 0;
1600 BN_CTX_start(ctx);
edea42c6
PY
1601 x = BN_CTX_get(ctx);
1602 y = BN_CTX_get(ctx);
1603 if (y == NULL)
0f113f3e
MC
1604 goto err;
1605 /* get the generator */
1606 if (group->generator == NULL)
1607 goto err;
1608 generator = EC_POINT_new(group);
1609 if (generator == NULL)
1610 goto err;
1611 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1612 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1613 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1614 goto err;
1615 if ((pre = nistp224_pre_comp_new()) == NULL)
1616 goto err;
1617 /*
1618 * if the generator is the standard one, use built-in precomputation
1619 */
1620 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1621 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 1622 goto done;
0f113f3e 1623 }
ace8f546
AP
1624 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1625 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1626 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
0f113f3e
MC
1627 goto err;
1628 /*
1629 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1630 * 2^140*G, 2^196*G for the second one
1631 */
1632 for (i = 1; i <= 8; i <<= 1) {
1633 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1634 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1635 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1636 for (j = 0; j < 27; ++j) {
1637 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1638 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1639 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1640 }
1641 if (i == 8)
1642 break;
1643 point_double(pre->g_pre_comp[0][2 * i][0],
1644 pre->g_pre_comp[0][2 * i][1],
1645 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1646 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1647 for (j = 0; j < 27; ++j) {
1648 point_double(pre->g_pre_comp[0][2 * i][0],
1649 pre->g_pre_comp[0][2 * i][1],
1650 pre->g_pre_comp[0][2 * i][2],
1651 pre->g_pre_comp[0][2 * i][0],
1652 pre->g_pre_comp[0][2 * i][1],
1653 pre->g_pre_comp[0][2 * i][2]);
1654 }
1655 }
1656 for (i = 0; i < 2; i++) {
1657 /* g_pre_comp[i][0] is the point at infinity */
1658 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1659 /* the remaining multiples */
1660 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1661 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1662 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1663 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1664 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1665 pre->g_pre_comp[i][2][2]);
1666 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1667 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1668 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1669 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1670 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1671 pre->g_pre_comp[i][2][2]);
1672 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1673 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1674 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1675 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1676 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1677 pre->g_pre_comp[i][4][2]);
1678 /*
1679 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1680 */
1681 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1682 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1683 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1684 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1685 pre->g_pre_comp[i][2][2]);
1686 for (j = 1; j < 8; ++j) {
1687 /* odd multiples: add G resp. 2^28*G */
1688 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1689 pre->g_pre_comp[i][2 * j + 1][1],
1690 pre->g_pre_comp[i][2 * j + 1][2],
1691 pre->g_pre_comp[i][2 * j][0],
1692 pre->g_pre_comp[i][2 * j][1],
1693 pre->g_pre_comp[i][2 * j][2], 0,
1694 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1695 pre->g_pre_comp[i][1][2]);
1696 }
1697 }
1698 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1699
615614c8 1700 done:
3aef36ff 1701 SETPRECOMP(group, nistp224, pre);
0f113f3e 1702 pre = NULL;
3aef36ff 1703 ret = 1;
04daec86 1704 err:
0f113f3e 1705 BN_CTX_end(ctx);
8fdc3734 1706 EC_POINT_free(generator);
23a1d5e9 1707 BN_CTX_free(new_ctx);
3aef36ff 1708 EC_nistp224_pre_comp_free(pre);
0f113f3e
MC
1709 return ret;
1710}
04daec86
BM
1711
1712int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
0f113f3e 1713{
3aef36ff 1714 return HAVEPRECOMP(group, nistp224);
0f113f3e 1715}
396cb565 1716
04daec86 1717#endif