]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp224.c
Replace overrides.
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
CommitLineData
04daec86
BM
1/*
2 * Written by Emilia Kasper (Google) for the OpenSSL project.
3 */
3e00b4c9 4/* Copyright 2011 Google Inc.
04daec86 5 *
3e00b4c9 6 * Licensed under the Apache License, Version 2.0 (the "License");
04daec86 7 *
3e00b4c9
BM
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
04daec86 10 *
3e00b4c9 11 * http://www.apache.org/licenses/LICENSE-2.0
04daec86 12 *
3e00b4c9
BM
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
04daec86
BM
18 */
19
20/*
21 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
22 *
23 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
24 * and Adam Langley's public domain 64-bit C implementation of curve25519
25 */
e0d6132b
BM
26
27#include <openssl/opensslconf.h>
effaf4de
RS
28#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
29NON_EMPTY_TRANSLATION_UNIT
30#else
e0d6132b 31
0f113f3e
MC
32# include <stdint.h>
33# include <string.h>
34# include <openssl/err.h>
35# include "ec_lcl.h"
04daec86 36
0f113f3e 37# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
396cb565 38 /* even with gcc, the typedef won't work for 32-bit platforms */
0f113f3e
MC
39typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
40 * platforms */
41# else
42# error "Need GCC 3.1 or later to define type uint128_t"
43# endif
04daec86
BM
44
45typedef uint8_t u8;
3e00b4c9
BM
46typedef uint64_t u64;
47typedef int64_t s64;
04daec86 48
04daec86 49/******************************************************************************/
1d97c843
TH
50/*-
51 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
04daec86
BM
52 *
53 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
3e00b4c9
BM
54 * using 64-bit coefficients called 'limbs',
55 * and sometimes (for multiplication results) as
04daec86 56 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
3e00b4c9
BM
57 * using 128-bit coefficients called 'widelimbs'.
58 * A 4-limb representation is an 'felem';
59 * a 7-widelimb representation is a 'widefelem'.
60 * Even within felems, bits of adjacent limbs overlap, and we don't always
61 * reduce the representations: we ensure that inputs to each felem
04daec86
BM
62 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
63 * and fit into a 128-bit word without overflow. The coefficients are then
3e00b4c9
BM
64 * again partially reduced to obtain an felem satisfying a_i < 2^57.
65 * We only reduce to the unique minimal representation at the end of the
66 * computation.
04daec86
BM
67 */
68
3e00b4c9
BM
69typedef uint64_t limb;
70typedef uint128_t widelimb;
71
72typedef limb felem[4];
73typedef widelimb widefelem[7];
04daec86 74
0f113f3e
MC
75/*
76 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
77 * group order size for the elliptic curve, and we also use this type for
78 * scalars for point multiplication.
79 */
396cb565
BM
80typedef u8 felem_bytearray[28];
81
82static const felem_bytearray nistp224_curve_params[5] = {
0f113f3e
MC
83 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
84 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
86 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
89 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
90 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
91 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
92 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
93 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
94 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
95 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
96 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
97 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
396cb565 98};
04daec86 99
1d97c843
TH
100/*-
101 * Precomputed multiples of the standard generator
3e00b4c9
BM
102 * Points are given in coordinates (X, Y, Z) where Z normally is 1
103 * (0 for the point at infinity).
104 * For each field element, slice a_0 is word 0, etc.
105 *
106 * The table has 2 * 16 elements, starting with the following:
107 * index | bits | point
108 * ------+---------+------------------------------
109 * 0 | 0 0 0 0 | 0G
110 * 1 | 0 0 0 1 | 1G
111 * 2 | 0 0 1 0 | 2^56G
112 * 3 | 0 0 1 1 | (2^56 + 1)G
113 * 4 | 0 1 0 0 | 2^112G
114 * 5 | 0 1 0 1 | (2^112 + 1)G
115 * 6 | 0 1 1 0 | (2^112 + 2^56)G
116 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
117 * 8 | 1 0 0 0 | 2^168G
118 * 9 | 1 0 0 1 | (2^168 + 1)G
119 * 10 | 1 0 1 0 | (2^168 + 2^56)G
120 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
121 * 12 | 1 1 0 0 | (2^168 + 2^112)G
122 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
123 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
124 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
125 * followed by a copy of this with each element multiplied by 2^28.
126 *
127 * The reason for this is so that we can clock bits into four different
128 * locations when doing simple scalar multiplies against the base point,
129 * and then another four locations using the second 16 elements.
130 */
4eb504ae
AP
131static const felem gmul[2][16][3] = {
132{{{0, 0, 0, 0},
133 {0, 0, 0, 0},
134 {0, 0, 0, 0}},
135 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
136 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
137 {1, 0, 0, 0}},
138 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
139 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
140 {1, 0, 0, 0}},
141 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
142 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
143 {1, 0, 0, 0}},
144 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
145 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
146 {1, 0, 0, 0}},
147 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
148 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
149 {1, 0, 0, 0}},
150 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
151 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
152 {1, 0, 0, 0}},
153 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
154 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
155 {1, 0, 0, 0}},
156 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
157 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
158 {1, 0, 0, 0}},
159 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
160 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
161 {1, 0, 0, 0}},
162 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
163 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
164 {1, 0, 0, 0}},
165 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
166 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
167 {1, 0, 0, 0}},
168 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
169 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
170 {1, 0, 0, 0}},
171 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
172 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
173 {1, 0, 0, 0}},
174 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
175 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
176 {1, 0, 0, 0}},
177 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
178 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
179 {1, 0, 0, 0}}},
0f113f3e
MC
180{{{0, 0, 0, 0},
181 {0, 0, 0, 0},
182 {0, 0, 0, 0}},
183 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
184 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
185 {1, 0, 0, 0}},
186 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
187 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
188 {1, 0, 0, 0}},
189 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
190 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
191 {1, 0, 0, 0}},
192 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
193 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
194 {1, 0, 0, 0}},
195 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
196 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
197 {1, 0, 0, 0}},
198 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
199 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
200 {1, 0, 0, 0}},
201 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
202 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
203 {1, 0, 0, 0}},
204 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
205 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
206 {1, 0, 0, 0}},
207 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
208 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
209 {1, 0, 0, 0}},
210 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
211 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
212 {1, 0, 0, 0}},
213 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
214 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
215 {1, 0, 0, 0}},
216 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
217 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
218 {1, 0, 0, 0}},
219 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
220 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
221 {1, 0, 0, 0}},
222 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
223 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
224 {1, 0, 0, 0}},
225 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
226 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
227 {1, 0, 0, 0}}}
228};
04daec86
BM
229
230/* Precomputation for the group generator. */
3aef36ff 231struct nistp224_pre_comp_st {
0f113f3e
MC
232 felem g_pre_comp[2][16][3];
233 int references;
3aef36ff 234};
04daec86
BM
235
236const EC_METHOD *EC_GFp_nistp224_method(void)
0f113f3e
MC
237{
238 static const EC_METHOD ret = {
239 EC_FLAGS_DEFAULT_OCT,
240 NID_X9_62_prime_field,
241 ec_GFp_nistp224_group_init,
242 ec_GFp_simple_group_finish,
243 ec_GFp_simple_group_clear_finish,
244 ec_GFp_nist_group_copy,
245 ec_GFp_nistp224_group_set_curve,
246 ec_GFp_simple_group_get_curve,
247 ec_GFp_simple_group_get_degree,
e5b2ea0a 248 0, /* group_order_bits */
0f113f3e
MC
249 ec_GFp_simple_group_check_discriminant,
250 ec_GFp_simple_point_init,
251 ec_GFp_simple_point_finish,
252 ec_GFp_simple_point_clear_finish,
253 ec_GFp_simple_point_copy,
254 ec_GFp_simple_point_set_to_infinity,
255 ec_GFp_simple_set_Jprojective_coordinates_GFp,
256 ec_GFp_simple_get_Jprojective_coordinates_GFp,
257 ec_GFp_simple_point_set_affine_coordinates,
258 ec_GFp_nistp224_point_get_affine_coordinates,
259 0 /* point_set_compressed_coordinates */ ,
260 0 /* point2oct */ ,
261 0 /* oct2point */ ,
262 ec_GFp_simple_add,
263 ec_GFp_simple_dbl,
264 ec_GFp_simple_invert,
265 ec_GFp_simple_is_at_infinity,
266 ec_GFp_simple_is_on_curve,
267 ec_GFp_simple_cmp,
268 ec_GFp_simple_make_affine,
269 ec_GFp_simple_points_make_affine,
270 ec_GFp_nistp224_points_mul,
271 ec_GFp_nistp224_precompute_mult,
272 ec_GFp_nistp224_have_precompute_mult,
273 ec_GFp_nist_field_mul,
274 ec_GFp_nist_field_sqr,
275 0 /* field_div */ ,
276 0 /* field_encode */ ,
277 0 /* field_decode */ ,
278 0 /* field_set_to_one */
279 };
280
281 return &ret;
282}
283
284/*
285 * Helper functions to convert field elements to/from internal representation
286 */
3e00b4c9 287static void bin28_to_felem(felem out, const u8 in[28])
0f113f3e
MC
288{
289 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
290 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
291 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
9fbbdd73 292 out[3] = (*((const uint64_t *)(in+20))) >> 8;
0f113f3e 293}
04daec86 294
3e00b4c9 295static void felem_to_bin28(u8 out[28], const felem in)
0f113f3e
MC
296{
297 unsigned i;
298 for (i = 0; i < 7; ++i) {
299 out[i] = in[0] >> (8 * i);
300 out[i + 7] = in[1] >> (8 * i);
301 out[i + 14] = in[2] >> (8 * i);
302 out[i + 21] = in[3] >> (8 * i);
303 }
304}
04daec86
BM
305
306/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
307static void flip_endian(u8 *out, const u8 *in, unsigned len)
0f113f3e
MC
308{
309 unsigned i;
310 for (i = 0; i < len; ++i)
311 out[i] = in[len - 1 - i];
312}
04daec86
BM
313
314/* From OpenSSL BIGNUM to internal representation */
3e00b4c9 315static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e
MC
316{
317 felem_bytearray b_in;
318 felem_bytearray b_out;
319 unsigned num_bytes;
320
321 /* BN_bn2bin eats leading zeroes */
16f8d4eb 322 memset(b_out, 0, sizeof(b_out));
0f113f3e
MC
323 num_bytes = BN_num_bytes(bn);
324 if (num_bytes > sizeof b_out) {
325 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
326 return 0;
327 }
328 if (BN_is_negative(bn)) {
329 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
330 return 0;
331 }
332 num_bytes = BN_bn2bin(bn, b_in);
333 flip_endian(b_out, b_in, num_bytes);
334 bin28_to_felem(out, b_out);
335 return 1;
336}
04daec86
BM
337
338/* From internal representation to OpenSSL BIGNUM */
3e00b4c9 339static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
0f113f3e
MC
340{
341 felem_bytearray b_in, b_out;
342 felem_to_bin28(b_in, in);
343 flip_endian(b_out, b_in, sizeof b_out);
344 return BN_bin2bn(b_out, sizeof b_out, out);
345}
04daec86
BM
346
347/******************************************************************************/
3a83462d 348/*-
0f113f3e 349 * FIELD OPERATIONS
04daec86
BM
350 *
351 * Field operations, using the internal representation of field elements.
352 * NB! These operations are specific to our point multiplication and cannot be
353 * expected to be correct in general - e.g., multiplication with a large scalar
354 * will cause an overflow.
355 *
356 */
357
3e00b4c9 358static void felem_one(felem out)
0f113f3e
MC
359{
360 out[0] = 1;
361 out[1] = 0;
362 out[2] = 0;
363 out[3] = 0;
364}
3e00b4c9
BM
365
366static void felem_assign(felem out, const felem in)
0f113f3e
MC
367{
368 out[0] = in[0];
369 out[1] = in[1];
370 out[2] = in[2];
371 out[3] = in[3];
372}
3e00b4c9 373
04daec86 374/* Sum two field elements: out += in */
3e00b4c9 375static void felem_sum(felem out, const felem in)
0f113f3e
MC
376{
377 out[0] += in[0];
378 out[1] += in[1];
379 out[2] += in[2];
380 out[3] += in[3];
381}
04daec86 382
3e00b4c9
BM
383/* Get negative value: out = -in */
384/* Assumes in[i] < 2^57 */
385static void felem_neg(felem out, const felem in)
0f113f3e
MC
386{
387 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
388 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
389 static const limb two58m42m2 = (((limb) 1) << 58) -
390 (((limb) 1) << 42) - (((limb) 1) << 2);
391
392 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
393 out[0] = two58p2 - in[0];
394 out[1] = two58m42m2 - in[1];
395 out[2] = two58m2 - in[2];
396 out[3] = two58m2 - in[3];
397}
3e00b4c9 398
04daec86
BM
399/* Subtract field elements: out -= in */
400/* Assumes in[i] < 2^57 */
3e00b4c9 401static void felem_diff(felem out, const felem in)
0f113f3e
MC
402{
403 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
404 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
405 static const limb two58m42m2 = (((limb) 1) << 58) -
406 (((limb) 1) << 42) - (((limb) 1) << 2);
407
408 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
409 out[0] += two58p2;
410 out[1] += two58m42m2;
411 out[2] += two58m2;
412 out[3] += two58m2;
413
414 out[0] -= in[0];
415 out[1] -= in[1];
416 out[2] -= in[2];
417 out[3] -= in[3];
418}
04daec86 419
3e00b4c9 420/* Subtract in unreduced 128-bit mode: out -= in */
04daec86 421/* Assumes in[i] < 2^119 */
3e00b4c9 422static void widefelem_diff(widefelem out, const widefelem in)
0f113f3e
MC
423{
424 static const widelimb two120 = ((widelimb) 1) << 120;
425 static const widelimb two120m64 = (((widelimb) 1) << 120) -
426 (((widelimb) 1) << 64);
427 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
428 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
429
430 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
431 out[0] += two120;
432 out[1] += two120m64;
433 out[2] += two120m64;
434 out[3] += two120;
435 out[4] += two120m104m64;
436 out[5] += two120m64;
437 out[6] += two120m64;
438
439 out[0] -= in[0];
440 out[1] -= in[1];
441 out[2] -= in[2];
442 out[3] -= in[3];
443 out[4] -= in[4];
444 out[5] -= in[5];
445 out[6] -= in[6];
446}
04daec86
BM
447
448/* Subtract in mixed mode: out128 -= in64 */
449/* in[i] < 2^63 */
3e00b4c9 450static void felem_diff_128_64(widefelem out, const felem in)
0f113f3e
MC
451{
452 static const widelimb two64p8 = (((widelimb) 1) << 64) +
453 (((widelimb) 1) << 8);
454 static const widelimb two64m8 = (((widelimb) 1) << 64) -
455 (((widelimb) 1) << 8);
456 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
457 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
458
459 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
460 out[0] += two64p8;
461 out[1] += two64m48m8;
462 out[2] += two64m8;
463 out[3] += two64m8;
464
465 out[0] -= in[0];
466 out[1] -= in[1];
467 out[2] -= in[2];
468 out[3] -= in[3];
469}
470
471/*
472 * Multiply a field element by a scalar: out = out * scalar The scalars we
473 * actually use are small, so results fit without overflow
474 */
3e00b4c9 475static void felem_scalar(felem out, const limb scalar)
0f113f3e
MC
476{
477 out[0] *= scalar;
478 out[1] *= scalar;
479 out[2] *= scalar;
480 out[3] *= scalar;
481}
482
483/*
484 * Multiply an unreduced field element by a scalar: out = out * scalar The
485 * scalars we actually use are small, so results fit without overflow
486 */
3e00b4c9 487static void widefelem_scalar(widefelem out, const widelimb scalar)
0f113f3e
MC
488{
489 out[0] *= scalar;
490 out[1] *= scalar;
491 out[2] *= scalar;
492 out[3] *= scalar;
493 out[4] *= scalar;
494 out[5] *= scalar;
495 out[6] *= scalar;
496}
04daec86
BM
497
498/* Square a field element: out = in^2 */
3e00b4c9 499static void felem_square(widefelem out, const felem in)
0f113f3e
MC
500{
501 limb tmp0, tmp1, tmp2;
502 tmp0 = 2 * in[0];
503 tmp1 = 2 * in[1];
504 tmp2 = 2 * in[2];
505 out[0] = ((widelimb) in[0]) * in[0];
506 out[1] = ((widelimb) in[0]) * tmp1;
507 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
508 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
509 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
510 out[5] = ((widelimb) in[3]) * tmp2;
511 out[6] = ((widelimb) in[3]) * in[3];
512}
04daec86
BM
513
514/* Multiply two field elements: out = in1 * in2 */
3e00b4c9 515static void felem_mul(widefelem out, const felem in1, const felem in2)
0f113f3e
MC
516{
517 out[0] = ((widelimb) in1[0]) * in2[0];
518 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
519 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
4eb504ae 520 ((widelimb) in1[2]) * in2[0];
0f113f3e 521 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
4eb504ae 522 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
0f113f3e 523 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
4eb504ae 524 ((widelimb) in1[3]) * in2[1];
0f113f3e
MC
525 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
526 out[6] = ((widelimb) in1[3]) * in2[3];
527}
04daec86 528
3a83462d
MC
529/*-
530 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
3e00b4c9
BM
531 * Requires in[i] < 2^126,
532 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
533static void felem_reduce(felem out, const widefelem in)
0f113f3e
MC
534{
535 static const widelimb two127p15 = (((widelimb) 1) << 127) +
536 (((widelimb) 1) << 15);
537 static const widelimb two127m71 = (((widelimb) 1) << 127) -
538 (((widelimb) 1) << 71);
539 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
540 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
541 widelimb output[5];
542
543 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
544 output[0] = in[0] + two127p15;
545 output[1] = in[1] + two127m71m55;
546 output[2] = in[2] + two127m71;
547 output[3] = in[3];
548 output[4] = in[4];
549
550 /* Eliminate in[4], in[5], in[6] */
551 output[4] += in[6] >> 16;
552 output[3] += (in[6] & 0xffff) << 40;
553 output[2] -= in[6];
554
555 output[3] += in[5] >> 16;
556 output[2] += (in[5] & 0xffff) << 40;
557 output[1] -= in[5];
558
559 output[2] += output[4] >> 16;
560 output[1] += (output[4] & 0xffff) << 40;
561 output[0] -= output[4];
562
563 /* Carry 2 -> 3 -> 4 */
564 output[3] += output[2] >> 56;
565 output[2] &= 0x00ffffffffffffff;
566
567 output[4] = output[3] >> 56;
568 output[3] &= 0x00ffffffffffffff;
569
570 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
571
572 /* Eliminate output[4] */
573 output[2] += output[4] >> 16;
574 /* output[2] < 2^56 + 2^56 = 2^57 */
575 output[1] += (output[4] & 0xffff) << 40;
576 output[0] -= output[4];
577
578 /* Carry 0 -> 1 -> 2 -> 3 */
579 output[1] += output[0] >> 56;
580 out[0] = output[0] & 0x00ffffffffffffff;
581
582 output[2] += output[1] >> 56;
583 /* output[2] < 2^57 + 2^72 */
584 out[1] = output[1] & 0x00ffffffffffffff;
585 output[3] += output[2] >> 56;
586 /* output[3] <= 2^56 + 2^16 */
587 out[2] = output[2] & 0x00ffffffffffffff;
588
50e735f9
MC
589 /*-
590 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
591 * out[3] <= 2^56 + 2^16 (due to final carry),
592 * so out < 2*p
593 */
0f113f3e
MC
594 out[3] = output[3];
595}
04daec86 596
3e00b4c9 597static void felem_square_reduce(felem out, const felem in)
0f113f3e
MC
598{
599 widefelem tmp;
600 felem_square(tmp, in);
601 felem_reduce(out, tmp);
602}
04daec86 603
3e00b4c9 604static void felem_mul_reduce(felem out, const felem in1, const felem in2)
0f113f3e
MC
605{
606 widefelem tmp;
607 felem_mul(tmp, in1, in2);
608 felem_reduce(out, tmp);
609}
610
611/*
612 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
613 * call felem_reduce first)
614 */
3e00b4c9 615static void felem_contract(felem out, const felem in)
0f113f3e
MC
616{
617 static const int64_t two56 = ((limb) 1) << 56;
618 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
619 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
620 int64_t tmp[4], a;
621 tmp[0] = in[0];
622 tmp[1] = in[1];
623 tmp[2] = in[2];
624 tmp[3] = in[3];
625 /* Case 1: a = 1 iff in >= 2^224 */
626 a = (in[3] >> 56);
627 tmp[0] -= a;
628 tmp[1] += a << 40;
629 tmp[3] &= 0x00ffffffffffffff;
630 /*
631 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
632 * and the lower part is non-zero
633 */
634 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
635 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
636 a &= 0x00ffffffffffffff;
637 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
638 a = (a - 1) >> 63;
639 /* subtract 2^224 - 2^96 + 1 if a is all-one */
640 tmp[3] &= a ^ 0xffffffffffffffff;
641 tmp[2] &= a ^ 0xffffffffffffffff;
642 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
643 tmp[0] -= 1 & a;
644
645 /*
646 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
647 * non-zero, so we only need one step
648 */
649 a = tmp[0] >> 63;
650 tmp[0] += two56 & a;
651 tmp[1] -= 1 & a;
652
653 /* carry 1 -> 2 -> 3 */
654 tmp[2] += tmp[1] >> 56;
655 tmp[1] &= 0x00ffffffffffffff;
656
657 tmp[3] += tmp[2] >> 56;
658 tmp[2] &= 0x00ffffffffffffff;
659
660 /* Now 0 <= out < p */
661 out[0] = tmp[0];
662 out[1] = tmp[1];
663 out[2] = tmp[2];
664 out[3] = tmp[3];
665}
666
667/*
668 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
669 * elements are reduced to in < 2^225, so we only need to check three cases:
670 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
671 */
3e00b4c9 672static limb felem_is_zero(const felem in)
0f113f3e
MC
673{
674 limb zero, two224m96p1, two225m97p2;
675
676 zero = in[0] | in[1] | in[2] | in[3];
677 zero = (((int64_t) (zero) - 1) >> 63) & 1;
678 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
679 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
680 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
681 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
682 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
683 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
684 return (zero | two224m96p1 | two225m97p2);
685}
04daec86 686
3e00b4c9 687static limb felem_is_zero_int(const felem in)
0f113f3e
MC
688{
689 return (int)(felem_is_zero(in) & ((limb) 1));
690}
3e00b4c9 691
04daec86
BM
692/* Invert a field element */
693/* Computation chain copied from djb's code */
3e00b4c9 694static void felem_inv(felem out, const felem in)
0f113f3e
MC
695{
696 felem ftmp, ftmp2, ftmp3, ftmp4;
697 widefelem tmp;
698 unsigned i;
699
700 felem_square(tmp, in);
701 felem_reduce(ftmp, tmp); /* 2 */
702 felem_mul(tmp, in, ftmp);
703 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
704 felem_square(tmp, ftmp);
705 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
706 felem_mul(tmp, in, ftmp);
707 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
708 felem_square(tmp, ftmp);
709 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
710 felem_square(tmp, ftmp2);
711 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
712 felem_square(tmp, ftmp2);
713 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
714 felem_mul(tmp, ftmp2, ftmp);
715 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
716 felem_square(tmp, ftmp);
717 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
718 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
719 felem_square(tmp, ftmp2);
720 felem_reduce(ftmp2, tmp);
721 }
722 felem_mul(tmp, ftmp2, ftmp);
723 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
724 felem_square(tmp, ftmp2);
725 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
726 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
727 felem_square(tmp, ftmp3);
728 felem_reduce(ftmp3, tmp);
729 }
730 felem_mul(tmp, ftmp3, ftmp2);
731 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
732 felem_square(tmp, ftmp2);
733 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
734 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
735 felem_square(tmp, ftmp3);
736 felem_reduce(ftmp3, tmp);
737 }
738 felem_mul(tmp, ftmp3, ftmp2);
739 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
740 felem_square(tmp, ftmp3);
741 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
742 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
743 felem_square(tmp, ftmp4);
744 felem_reduce(ftmp4, tmp);
745 }
746 felem_mul(tmp, ftmp3, ftmp4);
747 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
748 felem_square(tmp, ftmp3);
749 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
750 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
751 felem_square(tmp, ftmp4);
752 felem_reduce(ftmp4, tmp);
753 }
754 felem_mul(tmp, ftmp2, ftmp4);
755 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
756 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
757 felem_square(tmp, ftmp2);
758 felem_reduce(ftmp2, tmp);
759 }
760 felem_mul(tmp, ftmp2, ftmp);
761 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
762 felem_square(tmp, ftmp);
763 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
764 felem_mul(tmp, ftmp, in);
765 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
766 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
767 felem_square(tmp, ftmp);
768 felem_reduce(ftmp, tmp);
769 }
770 felem_mul(tmp, ftmp, ftmp3);
771 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
772}
773
774/*
775 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
776 * out to itself.
777 */
778static void copy_conditional(felem out, const felem in, limb icopy)
779{
780 unsigned i;
781 /*
782 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
783 */
784 const limb copy = -icopy;
785 for (i = 0; i < 4; ++i) {
786 const limb tmp = copy & (in[i] ^ out[i]);
787 out[i] ^= tmp;
788 }
789}
04daec86 790
04daec86 791/******************************************************************************/
3a83462d 792/*-
0f113f3e 793 * ELLIPTIC CURVE POINT OPERATIONS
04daec86
BM
794 *
795 * Points are represented in Jacobian projective coordinates:
796 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
797 * or to the point at infinity if Z == 0.
798 *
799 */
800
1d97c843
TH
801/*-
802 * Double an elliptic curve point:
04daec86
BM
803 * (X', Y', Z') = 2 * (X, Y, Z), where
804 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
805 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
806 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
807 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
0f113f3e 808 * while x_out == y_in is not (maybe this works, but it's not tested).
1d97c843 809 */
04daec86 810static void
3e00b4c9
BM
811point_double(felem x_out, felem y_out, felem z_out,
812 const felem x_in, const felem y_in, const felem z_in)
0f113f3e
MC
813{
814 widefelem tmp, tmp2;
815 felem delta, gamma, beta, alpha, ftmp, ftmp2;
816
817 felem_assign(ftmp, x_in);
818 felem_assign(ftmp2, x_in);
819
820 /* delta = z^2 */
821 felem_square(tmp, z_in);
822 felem_reduce(delta, tmp);
823
824 /* gamma = y^2 */
825 felem_square(tmp, y_in);
826 felem_reduce(gamma, tmp);
827
828 /* beta = x*gamma */
829 felem_mul(tmp, x_in, gamma);
830 felem_reduce(beta, tmp);
831
832 /* alpha = 3*(x-delta)*(x+delta) */
833 felem_diff(ftmp, delta);
834 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
835 felem_sum(ftmp2, delta);
836 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
837 felem_scalar(ftmp2, 3);
838 /* ftmp2[i] < 3 * 2^58 < 2^60 */
839 felem_mul(tmp, ftmp, ftmp2);
840 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
841 felem_reduce(alpha, tmp);
842
843 /* x' = alpha^2 - 8*beta */
844 felem_square(tmp, alpha);
845 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
846 felem_assign(ftmp, beta);
847 felem_scalar(ftmp, 8);
848 /* ftmp[i] < 8 * 2^57 = 2^60 */
849 felem_diff_128_64(tmp, ftmp);
850 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
851 felem_reduce(x_out, tmp);
852
853 /* z' = (y + z)^2 - gamma - delta */
854 felem_sum(delta, gamma);
855 /* delta[i] < 2^57 + 2^57 = 2^58 */
856 felem_assign(ftmp, y_in);
857 felem_sum(ftmp, z_in);
858 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
859 felem_square(tmp, ftmp);
860 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
861 felem_diff_128_64(tmp, delta);
862 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
863 felem_reduce(z_out, tmp);
864
865 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
866 felem_scalar(beta, 4);
867 /* beta[i] < 4 * 2^57 = 2^59 */
868 felem_diff(beta, x_out);
869 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
870 felem_mul(tmp, alpha, beta);
871 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
872 felem_square(tmp2, gamma);
873 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
874 widefelem_scalar(tmp2, 8);
875 /* tmp2[i] < 8 * 2^116 = 2^119 */
876 widefelem_diff(tmp, tmp2);
877 /* tmp[i] < 2^119 + 2^120 < 2^121 */
878 felem_reduce(y_out, tmp);
879}
04daec86 880
1d97c843
TH
881/*-
882 * Add two elliptic curve points:
04daec86
BM
883 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
884 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
885 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
886 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
887 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
3e00b4c9
BM
888 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
889 *
890 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
891 */
04daec86 892
0f113f3e
MC
893/*
894 * This function is not entirely constant-time: it includes a branch for
895 * checking whether the two input points are equal, (while not equal to the
896 * point at infinity). This case never happens during single point
897 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
898 */
3e00b4c9 899static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
900 const felem x1, const felem y1, const felem z1,
901 const int mixed, const felem x2, const felem y2,
902 const felem z2)
903{
904 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
905 widefelem tmp, tmp2;
906 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
907
908 if (!mixed) {
909 /* ftmp2 = z2^2 */
910 felem_square(tmp, z2);
911 felem_reduce(ftmp2, tmp);
912
913 /* ftmp4 = z2^3 */
914 felem_mul(tmp, ftmp2, z2);
915 felem_reduce(ftmp4, tmp);
916
917 /* ftmp4 = z2^3*y1 */
918 felem_mul(tmp2, ftmp4, y1);
919 felem_reduce(ftmp4, tmp2);
920
921 /* ftmp2 = z2^2*x1 */
922 felem_mul(tmp2, ftmp2, x1);
923 felem_reduce(ftmp2, tmp2);
924 } else {
925 /*
926 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
927 */
928
929 /* ftmp4 = z2^3*y1 */
930 felem_assign(ftmp4, y1);
931
932 /* ftmp2 = z2^2*x1 */
933 felem_assign(ftmp2, x1);
934 }
935
936 /* ftmp = z1^2 */
937 felem_square(tmp, z1);
938 felem_reduce(ftmp, tmp);
939
940 /* ftmp3 = z1^3 */
941 felem_mul(tmp, ftmp, z1);
942 felem_reduce(ftmp3, tmp);
943
944 /* tmp = z1^3*y2 */
945 felem_mul(tmp, ftmp3, y2);
946 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
947
948 /* ftmp3 = z1^3*y2 - z2^3*y1 */
949 felem_diff_128_64(tmp, ftmp4);
950 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
951 felem_reduce(ftmp3, tmp);
952
953 /* tmp = z1^2*x2 */
954 felem_mul(tmp, ftmp, x2);
955 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956
957 /* ftmp = z1^2*x2 - z2^2*x1 */
958 felem_diff_128_64(tmp, ftmp2);
959 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960 felem_reduce(ftmp, tmp);
961
962 /*
963 * the formulae are incorrect if the points are equal so we check for
964 * this and do doubling if this happens
965 */
966 x_equal = felem_is_zero(ftmp);
967 y_equal = felem_is_zero(ftmp3);
968 z1_is_zero = felem_is_zero(z1);
969 z2_is_zero = felem_is_zero(z2);
970 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
971 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
972 point_double(x3, y3, z3, x1, y1, z1);
973 return;
974 }
975
976 /* ftmp5 = z1*z2 */
977 if (!mixed) {
978 felem_mul(tmp, z1, z2);
979 felem_reduce(ftmp5, tmp);
980 } else {
981 /* special case z2 = 0 is handled later */
982 felem_assign(ftmp5, z1);
983 }
984
985 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
986 felem_mul(tmp, ftmp, ftmp5);
987 felem_reduce(z_out, tmp);
988
989 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
990 felem_assign(ftmp5, ftmp);
991 felem_square(tmp, ftmp);
992 felem_reduce(ftmp, tmp);
993
994 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
995 felem_mul(tmp, ftmp, ftmp5);
996 felem_reduce(ftmp5, tmp);
997
998 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
999 felem_mul(tmp, ftmp2, ftmp);
1000 felem_reduce(ftmp2, tmp);
1001
1002 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1003 felem_mul(tmp, ftmp4, ftmp5);
1004 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1005
1006 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1007 felem_square(tmp2, ftmp3);
1008 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1009
1010 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1011 felem_diff_128_64(tmp2, ftmp5);
1012 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1013
1014 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1015 felem_assign(ftmp5, ftmp2);
1016 felem_scalar(ftmp5, 2);
1017 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1018
50e735f9
MC
1019 /*-
1020 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1021 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1022 */
0f113f3e
MC
1023 felem_diff_128_64(tmp2, ftmp5);
1024 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1025 felem_reduce(x_out, tmp2);
1026
1027 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1028 felem_diff(ftmp2, x_out);
1029 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1030
1031 /*
1032 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1033 */
1034 felem_mul(tmp2, ftmp3, ftmp2);
1035 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1036
50e735f9
MC
1037 /*-
1038 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1039 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1040 */
0f113f3e
MC
1041 widefelem_diff(tmp2, tmp);
1042 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1043 felem_reduce(y_out, tmp2);
1044
1045 /*
1046 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1047 * the point at infinity, so we need to check for this separately
1048 */
1049
1050 /*
1051 * if point 1 is at infinity, copy point 2 to output, and vice versa
1052 */
1053 copy_conditional(x_out, x2, z1_is_zero);
1054 copy_conditional(x_out, x1, z2_is_zero);
1055 copy_conditional(y_out, y2, z1_is_zero);
1056 copy_conditional(y_out, y1, z2_is_zero);
1057 copy_conditional(z_out, z2, z1_is_zero);
1058 copy_conditional(z_out, z1, z2_is_zero);
1059 felem_assign(x3, x_out);
1060 felem_assign(y3, y_out);
1061 felem_assign(z3, z_out);
1062}
04daec86 1063
dbd87ffc
MC
1064/*
1065 * select_point selects the |idx|th point from a precomputation table and
1066 * copies it to out.
1067 * The pre_comp array argument should be size of |size| argument
1068 */
0f113f3e
MC
1069static void select_point(const u64 idx, unsigned int size,
1070 const felem pre_comp[][3], felem out[3])
1071{
1072 unsigned i, j;
1073 limb *outlimbs = &out[0][0];
0f113f3e 1074
88f4c6f3 1075 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1076 for (i = 0; i < size; i++) {
1077 const limb *inlimbs = &pre_comp[i][0][0];
1078 u64 mask = i ^ idx;
1079 mask |= mask >> 4;
1080 mask |= mask >> 2;
1081 mask |= mask >> 1;
1082 mask &= 1;
1083 mask--;
1084 for (j = 0; j < 4 * 3; j++)
1085 outlimbs[j] |= inlimbs[j] & mask;
1086 }
1087}
3e00b4c9
BM
1088
1089/* get_bit returns the |i|th bit in |in| */
1090static char get_bit(const felem_bytearray in, unsigned i)
0f113f3e
MC
1091{
1092 if (i >= 224)
1093 return 0;
1094 return (in[i >> 3] >> (i & 7)) & 1;
1095}
1096
1097/*
1098 * Interleaved point multiplication using precomputed point multiples: The
1099 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1100 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1101 * generator, using certain (large) precomputed multiples in g_pre_comp.
1102 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1103 */
3e00b4c9 1104static void batch_mul(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1105 const felem_bytearray scalars[],
1106 const unsigned num_points, const u8 *g_scalar,
1107 const int mixed, const felem pre_comp[][17][3],
1108 const felem g_pre_comp[2][16][3])
1109{
1110 int i, skip;
1111 unsigned num;
1112 unsigned gen_mul = (g_scalar != NULL);
1113 felem nq[3], tmp[4];
1114 u64 bits;
1115 u8 sign, digit;
1116
1117 /* set nq to the point at infinity */
16f8d4eb 1118 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1119
1120 /*
1121 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1122 * of the generator (two in each of the last 28 rounds) and additions of
1123 * other points multiples (every 5th round).
1124 */
1125 skip = 1; /* save two point operations in the first
1126 * round */
1127 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1128 /* double */
1129 if (!skip)
1130 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1131
1132 /* add multiples of the generator */
1133 if (gen_mul && (i <= 27)) {
1134 /* first, look 28 bits upwards */
1135 bits = get_bit(g_scalar, i + 196) << 3;
1136 bits |= get_bit(g_scalar, i + 140) << 2;
1137 bits |= get_bit(g_scalar, i + 84) << 1;
1138 bits |= get_bit(g_scalar, i + 28);
1139 /* select the point to add, in constant time */
1140 select_point(bits, 16, g_pre_comp[1], tmp);
1141
1142 if (!skip) {
1143 /* value 1 below is argument for "mixed" */
1144 point_add(nq[0], nq[1], nq[2],
1145 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1146 } else {
1147 memcpy(nq, tmp, 3 * sizeof(felem));
1148 skip = 0;
1149 }
1150
1151 /* second, look at the current position */
1152 bits = get_bit(g_scalar, i + 168) << 3;
1153 bits |= get_bit(g_scalar, i + 112) << 2;
1154 bits |= get_bit(g_scalar, i + 56) << 1;
1155 bits |= get_bit(g_scalar, i);
1156 /* select the point to add, in constant time */
1157 select_point(bits, 16, g_pre_comp[0], tmp);
1158 point_add(nq[0], nq[1], nq[2],
1159 nq[0], nq[1], nq[2],
1160 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1161 }
1162
1163 /* do other additions every 5 doublings */
1164 if (num_points && (i % 5 == 0)) {
1165 /* loop over all scalars */
1166 for (num = 0; num < num_points; ++num) {
1167 bits = get_bit(scalars[num], i + 4) << 5;
1168 bits |= get_bit(scalars[num], i + 3) << 4;
1169 bits |= get_bit(scalars[num], i + 2) << 3;
1170 bits |= get_bit(scalars[num], i + 1) << 2;
1171 bits |= get_bit(scalars[num], i) << 1;
1172 bits |= get_bit(scalars[num], i - 1);
1173 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1174
1175 /* select the point to add or subtract */
1176 select_point(digit, 17, pre_comp[num], tmp);
1177 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1178 * point */
1179 copy_conditional(tmp[1], tmp[3], sign);
1180
1181 if (!skip) {
1182 point_add(nq[0], nq[1], nq[2],
1183 nq[0], nq[1], nq[2],
1184 mixed, tmp[0], tmp[1], tmp[2]);
1185 } else {
1186 memcpy(nq, tmp, 3 * sizeof(felem));
1187 skip = 0;
1188 }
1189 }
1190 }
1191 }
1192 felem_assign(x_out, nq[0]);
1193 felem_assign(y_out, nq[1]);
1194 felem_assign(z_out, nq[2]);
1195}
04daec86
BM
1196
1197/******************************************************************************/
0f113f3e
MC
1198/*
1199 * FUNCTIONS TO MANAGE PRECOMPUTATION
04daec86
BM
1200 */
1201
1202static NISTP224_PRE_COMP *nistp224_pre_comp_new()
0f113f3e 1203{
b51bce94
RS
1204 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1205
0f113f3e
MC
1206 if (!ret) {
1207 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1208 return ret;
1209 }
0f113f3e
MC
1210 ret->references = 1;
1211 return ret;
1212}
04daec86 1213
3aef36ff 1214NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
0f113f3e 1215{
3aef36ff
RS
1216 if (p != NULL)
1217 CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1218 return p;
0f113f3e 1219}
04daec86 1220
3aef36ff 1221void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
0f113f3e 1222{
3aef36ff
RS
1223 if (p == NULL
1224 || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
0f113f3e 1225 return;
3aef36ff 1226 OPENSSL_free(p);
0f113f3e 1227}
04daec86
BM
1228
1229/******************************************************************************/
0f113f3e
MC
1230/*
1231 * OPENSSL EC_METHOD FUNCTIONS
04daec86
BM
1232 */
1233
1234int ec_GFp_nistp224_group_init(EC_GROUP *group)
0f113f3e
MC
1235{
1236 int ret;
1237 ret = ec_GFp_simple_group_init(group);
1238 group->a_is_minus3 = 1;
1239 return ret;
1240}
04daec86
BM
1241
1242int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
0f113f3e
MC
1243 const BIGNUM *a, const BIGNUM *b,
1244 BN_CTX *ctx)
1245{
1246 int ret = 0;
1247 BN_CTX *new_ctx = NULL;
1248 BIGNUM *curve_p, *curve_a, *curve_b;
1249
1250 if (ctx == NULL)
1251 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1252 return 0;
1253 BN_CTX_start(ctx);
1254 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1255 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1256 ((curve_b = BN_CTX_get(ctx)) == NULL))
1257 goto err;
1258 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1259 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1260 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1261 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1262 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1263 EC_R_WRONG_CURVE_PARAMETERS);
1264 goto err;
1265 }
1266 group->field_mod_func = BN_nist_mod_224;
1267 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1268 err:
1269 BN_CTX_end(ctx);
23a1d5e9 1270 BN_CTX_free(new_ctx);
0f113f3e
MC
1271 return ret;
1272}
1273
1274/*
1275 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1276 * (X/Z^2, Y/Z^3)
1277 */
04daec86 1278int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
0f113f3e
MC
1279 const EC_POINT *point,
1280 BIGNUM *x, BIGNUM *y,
1281 BN_CTX *ctx)
1282{
1283 felem z1, z2, x_in, y_in, x_out, y_out;
1284 widefelem tmp;
1285
1286 if (EC_POINT_is_at_infinity(group, point)) {
1287 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1288 EC_R_POINT_AT_INFINITY);
1289 return 0;
1290 }
ace8f546
AP
1291 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1292 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1293 return 0;
1294 felem_inv(z2, z1);
1295 felem_square(tmp, z2);
1296 felem_reduce(z1, tmp);
1297 felem_mul(tmp, x_in, z1);
1298 felem_reduce(x_in, tmp);
1299 felem_contract(x_out, x_in);
1300 if (x != NULL) {
1301 if (!felem_to_BN(x, x_out)) {
1302 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1303 ERR_R_BN_LIB);
1304 return 0;
1305 }
1306 }
1307 felem_mul(tmp, z1, z2);
1308 felem_reduce(z1, tmp);
1309 felem_mul(tmp, y_in, z1);
1310 felem_reduce(y_in, tmp);
1311 felem_contract(y_out, y_in);
1312 if (y != NULL) {
1313 if (!felem_to_BN(y, y_out)) {
1314 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1315 ERR_R_BN_LIB);
1316 return 0;
1317 }
1318 }
1319 return 1;
1320}
1321
1322static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1323 felem tmp_felems[ /* num+1 */ ])
1324{
1325 /*
1326 * Runs in constant time, unless an input is the point at infinity (which
1327 * normally shouldn't happen).
1328 */
1329 ec_GFp_nistp_points_make_affine_internal(num,
1330 points,
1331 sizeof(felem),
1332 tmp_felems,
1333 (void (*)(void *))felem_one,
1334 (int (*)(const void *))
1335 felem_is_zero_int,
1336 (void (*)(void *, const void *))
1337 felem_assign,
1338 (void (*)(void *, const void *))
1339 felem_square_reduce, (void (*)
1340 (void *,
1341 const void
1342 *,
1343 const void
1344 *))
1345 felem_mul_reduce,
1346 (void (*)(void *, const void *))
1347 felem_inv,
1348 (void (*)(void *, const void *))
1349 felem_contract);
1350}
1351
1352/*
1353 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1354 * values Result is stored in r (r can equal one of the inputs).
1355 */
04daec86 1356int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
0f113f3e
MC
1357 const BIGNUM *scalar, size_t num,
1358 const EC_POINT *points[],
1359 const BIGNUM *scalars[], BN_CTX *ctx)
1360{
1361 int ret = 0;
1362 int j;
1363 unsigned i;
1364 int mixed = 0;
1365 BN_CTX *new_ctx = NULL;
1366 BIGNUM *x, *y, *z, *tmp_scalar;
1367 felem_bytearray g_secret;
1368 felem_bytearray *secrets = NULL;
16f8d4eb 1369 felem (*pre_comp)[17][3] = NULL;
0f113f3e
MC
1370 felem *tmp_felems = NULL;
1371 felem_bytearray tmp;
1372 unsigned num_bytes;
1373 int have_pre_comp = 0;
1374 size_t num_points = num;
1375 felem x_in, y_in, z_in, x_out, y_out, z_out;
1376 NISTP224_PRE_COMP *pre = NULL;
1377 const felem(*g_pre_comp)[16][3] = NULL;
1378 EC_POINT *generator = NULL;
1379 const EC_POINT *p = NULL;
1380 const BIGNUM *p_scalar = NULL;
1381
1382 if (ctx == NULL)
1383 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1384 return 0;
1385 BN_CTX_start(ctx);
1386 if (((x = BN_CTX_get(ctx)) == NULL) ||
1387 ((y = BN_CTX_get(ctx)) == NULL) ||
1388 ((z = BN_CTX_get(ctx)) == NULL) ||
1389 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1390 goto err;
1391
1392 if (scalar != NULL) {
3aef36ff 1393 pre = group->pre_comp.nistp224;
0f113f3e
MC
1394 if (pre)
1395 /* we have precomputation, try to use it */
1396 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1397 else
1398 /* try to use the standard precomputation */
1399 g_pre_comp = &gmul[0];
1400 generator = EC_POINT_new(group);
1401 if (generator == NULL)
1402 goto err;
1403 /* get the generator from precomputation */
1404 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1405 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1406 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1407 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1408 goto err;
1409 }
1410 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1411 generator, x, y, z,
1412 ctx))
1413 goto err;
1414 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1415 /* precomputation matches generator */
1416 have_pre_comp = 1;
1417 else
1418 /*
1419 * we don't have valid precomputation: treat the generator as a
1420 * random point
1421 */
1422 num_points = num_points + 1;
1423 }
1424
1425 if (num_points > 0) {
1426 if (num_points >= 3) {
1427 /*
1428 * unless we precompute multiples for just one or two points,
1429 * converting those into affine form is time well spent
1430 */
1431 mixed = 1;
1432 }
b51bce94
RS
1433 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1434 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
1435 if (mixed)
1436 tmp_felems =
16f8d4eb 1437 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
0f113f3e
MC
1438 if ((secrets == NULL) || (pre_comp == NULL)
1439 || (mixed && (tmp_felems == NULL))) {
1440 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1441 goto err;
1442 }
1443
1444 /*
1445 * we treat NULL scalars as 0, and NULL points as points at infinity,
1446 * i.e., they contribute nothing to the linear combination
1447 */
0f113f3e
MC
1448 for (i = 0; i < num_points; ++i) {
1449 if (i == num)
1450 /* the generator */
1451 {
1452 p = EC_GROUP_get0_generator(group);
1453 p_scalar = scalar;
1454 } else
1455 /* the i^th point */
1456 {
1457 p = points[i];
1458 p_scalar = scalars[i];
1459 }
1460 if ((p_scalar != NULL) && (p != NULL)) {
1461 /* reduce scalar to 0 <= scalar < 2^224 */
1462 if ((BN_num_bits(p_scalar) > 224)
1463 || (BN_is_negative(p_scalar))) {
1464 /*
1465 * this is an unusual input, and we don't guarantee
1466 * constant-timeness
1467 */
ace8f546 1468 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
0f113f3e
MC
1469 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1470 goto err;
1471 }
1472 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1473 } else
1474 num_bytes = BN_bn2bin(p_scalar, tmp);
1475 flip_endian(secrets[i], tmp, num_bytes);
1476 /* precompute multiples */
ace8f546
AP
1477 if ((!BN_to_felem(x_out, p->X)) ||
1478 (!BN_to_felem(y_out, p->Y)) ||
1479 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
1480 goto err;
1481 felem_assign(pre_comp[i][1][0], x_out);
1482 felem_assign(pre_comp[i][1][1], y_out);
1483 felem_assign(pre_comp[i][1][2], z_out);
1484 for (j = 2; j <= 16; ++j) {
1485 if (j & 1) {
1486 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1487 pre_comp[i][j][2], pre_comp[i][1][0],
1488 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1489 pre_comp[i][j - 1][0],
1490 pre_comp[i][j - 1][1],
1491 pre_comp[i][j - 1][2]);
1492 } else {
1493 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1494 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1495 pre_comp[i][j / 2][1],
1496 pre_comp[i][j / 2][2]);
1497 }
1498 }
1499 }
1500 }
1501 if (mixed)
1502 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1503 }
1504
1505 /* the scalar for the generator */
1506 if ((scalar != NULL) && (have_pre_comp)) {
16f8d4eb 1507 memset(g_secret, 0, sizeof(g_secret));
0f113f3e
MC
1508 /* reduce scalar to 0 <= scalar < 2^224 */
1509 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1510 /*
1511 * this is an unusual input, and we don't guarantee
1512 * constant-timeness
1513 */
ace8f546 1514 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
0f113f3e
MC
1515 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1516 goto err;
1517 }
1518 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1519 } else
1520 num_bytes = BN_bn2bin(scalar, tmp);
1521 flip_endian(g_secret, tmp, num_bytes);
1522 /* do the multiplication with generator precomputation */
1523 batch_mul(x_out, y_out, z_out,
1524 (const felem_bytearray(*))secrets, num_points,
1525 g_secret,
1526 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1527 } else
1528 /* do the multiplication without generator precomputation */
1529 batch_mul(x_out, y_out, z_out,
1530 (const felem_bytearray(*))secrets, num_points,
1531 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1532 /* reduce the output to its unique minimal representation */
1533 felem_contract(x_in, x_out);
1534 felem_contract(y_in, y_out);
1535 felem_contract(z_in, z_out);
1536 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1537 (!felem_to_BN(z, z_in))) {
1538 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1539 goto err;
1540 }
1541 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1542
1543 err:
1544 BN_CTX_end(ctx);
8fdc3734 1545 EC_POINT_free(generator);
23a1d5e9 1546 BN_CTX_free(new_ctx);
b548a1f1
RS
1547 OPENSSL_free(secrets);
1548 OPENSSL_free(pre_comp);
1549 OPENSSL_free(tmp_felems);
0f113f3e
MC
1550 return ret;
1551}
04daec86
BM
1552
1553int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
1554{
1555 int ret = 0;
1556 NISTP224_PRE_COMP *pre = NULL;
1557 int i, j;
1558 BN_CTX *new_ctx = NULL;
1559 BIGNUM *x, *y;
1560 EC_POINT *generator = NULL;
1561 felem tmp_felems[32];
1562
1563 /* throw away old precomputation */
2c52ac9b 1564 EC_pre_comp_free(group);
0f113f3e
MC
1565 if (ctx == NULL)
1566 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1567 return 0;
1568 BN_CTX_start(ctx);
1569 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1570 goto err;
1571 /* get the generator */
1572 if (group->generator == NULL)
1573 goto err;
1574 generator = EC_POINT_new(group);
1575 if (generator == NULL)
1576 goto err;
1577 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1578 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1579 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1580 goto err;
1581 if ((pre = nistp224_pre_comp_new()) == NULL)
1582 goto err;
1583 /*
1584 * if the generator is the standard one, use built-in precomputation
1585 */
1586 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1587 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 1588 goto done;
0f113f3e 1589 }
ace8f546
AP
1590 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1591 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1592 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
0f113f3e
MC
1593 goto err;
1594 /*
1595 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1596 * 2^140*G, 2^196*G for the second one
1597 */
1598 for (i = 1; i <= 8; i <<= 1) {
1599 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1600 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1601 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1602 for (j = 0; j < 27; ++j) {
1603 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1604 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1605 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1606 }
1607 if (i == 8)
1608 break;
1609 point_double(pre->g_pre_comp[0][2 * i][0],
1610 pre->g_pre_comp[0][2 * i][1],
1611 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1612 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1613 for (j = 0; j < 27; ++j) {
1614 point_double(pre->g_pre_comp[0][2 * i][0],
1615 pre->g_pre_comp[0][2 * i][1],
1616 pre->g_pre_comp[0][2 * i][2],
1617 pre->g_pre_comp[0][2 * i][0],
1618 pre->g_pre_comp[0][2 * i][1],
1619 pre->g_pre_comp[0][2 * i][2]);
1620 }
1621 }
1622 for (i = 0; i < 2; i++) {
1623 /* g_pre_comp[i][0] is the point at infinity */
1624 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1625 /* the remaining multiples */
1626 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1627 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1628 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1629 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1630 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1631 pre->g_pre_comp[i][2][2]);
1632 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1633 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1634 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1635 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1636 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1637 pre->g_pre_comp[i][2][2]);
1638 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1639 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1640 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1641 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1642 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1643 pre->g_pre_comp[i][4][2]);
1644 /*
1645 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1646 */
1647 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1648 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1649 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1650 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1651 pre->g_pre_comp[i][2][2]);
1652 for (j = 1; j < 8; ++j) {
1653 /* odd multiples: add G resp. 2^28*G */
1654 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1655 pre->g_pre_comp[i][2 * j + 1][1],
1656 pre->g_pre_comp[i][2 * j + 1][2],
1657 pre->g_pre_comp[i][2 * j][0],
1658 pre->g_pre_comp[i][2 * j][1],
1659 pre->g_pre_comp[i][2 * j][2], 0,
1660 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1661 pre->g_pre_comp[i][1][2]);
1662 }
1663 }
1664 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1665
615614c8 1666 done:
3aef36ff 1667 SETPRECOMP(group, nistp224, pre);
0f113f3e 1668 pre = NULL;
3aef36ff 1669 ret = 1;
04daec86 1670 err:
0f113f3e 1671 BN_CTX_end(ctx);
8fdc3734 1672 EC_POINT_free(generator);
23a1d5e9 1673 BN_CTX_free(new_ctx);
3aef36ff 1674 EC_nistp224_pre_comp_free(pre);
0f113f3e
MC
1675 return ret;
1676}
04daec86
BM
1677
1678int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
0f113f3e 1679{
3aef36ff 1680 return HAVEPRECOMP(group, nistp224);
0f113f3e 1681}
396cb565 1682
04daec86 1683#endif