]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp224.c
threads_pthread.c: change inline to ossl_inline
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
CommitLineData
04daec86 1/*
da1c088f 2 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
4f22f405 3 *
a7f182b7 4 * Licensed under the Apache License 2.0 (the "License"). You may not use
4f22f405
RS
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
04daec86 8 */
4f22f405 9
3e00b4c9 10/* Copyright 2011 Google Inc.
04daec86 11 *
3e00b4c9 12 * Licensed under the Apache License, Version 2.0 (the "License");
04daec86 13 *
3e00b4c9
BM
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
04daec86 16 *
3e00b4c9 17 * http://www.apache.org/licenses/LICENSE-2.0
04daec86 18 *
3e00b4c9
BM
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
04daec86
BM
24 */
25
579422c8
P
26/*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30#include "internal/deprecated.h"
31
04daec86
BM
32/*
33 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34 *
35 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36 * and Adam Langley's public domain 64-bit C implementation of curve25519
37 */
e0d6132b
BM
38
39#include <openssl/opensslconf.h>
e0d6132b 40
705536e2
RS
41#include <stdint.h>
42#include <string.h>
43#include <openssl/err.h>
44#include "ec_local.h"
04daec86 45
5de32f22
AI
46#include "internal/numbers.h"
47
48#ifndef INT128_MAX
705536e2
RS
49# error "Your compiler doesn't appear to support 128-bit integer types"
50#endif
04daec86
BM
51
52typedef uint8_t u8;
3e00b4c9 53typedef uint64_t u64;
04daec86 54
04daec86 55/******************************************************************************/
1d97c843
TH
56/*-
57 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
04daec86
BM
58 *
59 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
3e00b4c9
BM
60 * using 64-bit coefficients called 'limbs',
61 * and sometimes (for multiplication results) as
04daec86 62 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
3e00b4c9
BM
63 * using 128-bit coefficients called 'widelimbs'.
64 * A 4-limb representation is an 'felem';
65 * a 7-widelimb representation is a 'widefelem'.
66 * Even within felems, bits of adjacent limbs overlap, and we don't always
67 * reduce the representations: we ensure that inputs to each felem
04daec86
BM
68 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69 * and fit into a 128-bit word without overflow. The coefficients are then
3e00b4c9
BM
70 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71 * We only reduce to the unique minimal representation at the end of the
72 * computation.
04daec86
BM
73 */
74
3e00b4c9 75typedef uint64_t limb;
77286fe3 76typedef uint64_t limb_aX __attribute((__aligned__(1)));
3e00b4c9
BM
77typedef uint128_t widelimb;
78
79typedef limb felem[4];
80typedef widelimb widefelem[7];
04daec86 81
0f113f3e 82/*
46f4e1be 83 * Field element represented as a byte array. 28*8 = 224 bits is also the
0f113f3e
MC
84 * group order size for the elliptic curve, and we also use this type for
85 * scalars for point multiplication.
86 */
396cb565
BM
87typedef u8 felem_bytearray[28];
88
89static const felem_bytearray nistp224_curve_params[5] = {
0f113f3e
MC
90 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
396cb565 105};
04daec86 106
1d97c843
TH
107/*-
108 * Precomputed multiples of the standard generator
3e00b4c9
BM
109 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110 * (0 for the point at infinity).
111 * For each field element, slice a_0 is word 0, etc.
112 *
113 * The table has 2 * 16 elements, starting with the following:
114 * index | bits | point
115 * ------+---------+------------------------------
116 * 0 | 0 0 0 0 | 0G
117 * 1 | 0 0 0 1 | 1G
118 * 2 | 0 0 1 0 | 2^56G
119 * 3 | 0 0 1 1 | (2^56 + 1)G
120 * 4 | 0 1 0 0 | 2^112G
121 * 5 | 0 1 0 1 | (2^112 + 1)G
122 * 6 | 0 1 1 0 | (2^112 + 2^56)G
123 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124 * 8 | 1 0 0 0 | 2^168G
125 * 9 | 1 0 0 1 | (2^168 + 1)G
126 * 10 | 1 0 1 0 | (2^168 + 2^56)G
127 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128 * 12 | 1 1 0 0 | (2^168 + 2^112)G
129 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132 * followed by a copy of this with each element multiplied by 2^28.
133 *
134 * The reason for this is so that we can clock bits into four different
135 * locations when doing simple scalar multiplies against the base point,
136 * and then another four locations using the second 16 elements.
137 */
4eb504ae
AP
138static const felem gmul[2][16][3] = {
139{{{0, 0, 0, 0},
140 {0, 0, 0, 0},
141 {0, 0, 0, 0}},
142 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144 {1, 0, 0, 0}},
145 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147 {1, 0, 0, 0}},
148 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150 {1, 0, 0, 0}},
151 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153 {1, 0, 0, 0}},
154 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156 {1, 0, 0, 0}},
157 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159 {1, 0, 0, 0}},
160 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162 {1, 0, 0, 0}},
163 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165 {1, 0, 0, 0}},
166 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168 {1, 0, 0, 0}},
169 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171 {1, 0, 0, 0}},
172 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174 {1, 0, 0, 0}},
175 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177 {1, 0, 0, 0}},
178 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180 {1, 0, 0, 0}},
181 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183 {1, 0, 0, 0}},
184 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186 {1, 0, 0, 0}}},
0f113f3e
MC
187{{{0, 0, 0, 0},
188 {0, 0, 0, 0},
189 {0, 0, 0, 0}},
190 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192 {1, 0, 0, 0}},
193 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195 {1, 0, 0, 0}},
196 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198 {1, 0, 0, 0}},
199 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201 {1, 0, 0, 0}},
202 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204 {1, 0, 0, 0}},
205 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207 {1, 0, 0, 0}},
208 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210 {1, 0, 0, 0}},
211 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213 {1, 0, 0, 0}},
214 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216 {1, 0, 0, 0}},
217 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219 {1, 0, 0, 0}},
220 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222 {1, 0, 0, 0}},
223 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225 {1, 0, 0, 0}},
226 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228 {1, 0, 0, 0}},
229 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231 {1, 0, 0, 0}},
232 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234 {1, 0, 0, 0}}}
235};
04daec86
BM
236
237/* Precomputation for the group generator. */
3aef36ff 238struct nistp224_pre_comp_st {
0f113f3e 239 felem g_pre_comp[2][16][3];
2f545ae4 240 CRYPTO_REF_COUNT references;
3aef36ff 241};
04daec86
BM
242
243const EC_METHOD *EC_GFp_nistp224_method(void)
0f113f3e
MC
244{
245 static const EC_METHOD ret = {
246 EC_FLAGS_DEFAULT_OCT,
247 NID_X9_62_prime_field,
32ab57cb
SL
248 ossl_ec_GFp_nistp224_group_init,
249 ossl_ec_GFp_simple_group_finish,
250 ossl_ec_GFp_simple_group_clear_finish,
251 ossl_ec_GFp_nist_group_copy,
252 ossl_ec_GFp_nistp224_group_set_curve,
253 ossl_ec_GFp_simple_group_get_curve,
254 ossl_ec_GFp_simple_group_get_degree,
255 ossl_ec_group_simple_order_bits,
256 ossl_ec_GFp_simple_group_check_discriminant,
257 ossl_ec_GFp_simple_point_init,
258 ossl_ec_GFp_simple_point_finish,
259 ossl_ec_GFp_simple_point_clear_finish,
260 ossl_ec_GFp_simple_point_copy,
261 ossl_ec_GFp_simple_point_set_to_infinity,
262 ossl_ec_GFp_simple_point_set_affine_coordinates,
263 ossl_ec_GFp_nistp224_point_get_affine_coordinates,
0f113f3e
MC
264 0 /* point_set_compressed_coordinates */ ,
265 0 /* point2oct */ ,
266 0 /* oct2point */ ,
32ab57cb
SL
267 ossl_ec_GFp_simple_add,
268 ossl_ec_GFp_simple_dbl,
269 ossl_ec_GFp_simple_invert,
270 ossl_ec_GFp_simple_is_at_infinity,
271 ossl_ec_GFp_simple_is_on_curve,
272 ossl_ec_GFp_simple_cmp,
273 ossl_ec_GFp_simple_make_affine,
274 ossl_ec_GFp_simple_points_make_affine,
275 ossl_ec_GFp_nistp224_points_mul,
276 ossl_ec_GFp_nistp224_precompute_mult,
277 ossl_ec_GFp_nistp224_have_precompute_mult,
278 ossl_ec_GFp_nist_field_mul,
279 ossl_ec_GFp_nist_field_sqr,
0f113f3e 280 0 /* field_div */ ,
32ab57cb 281 ossl_ec_GFp_simple_field_inv,
0f113f3e
MC
282 0 /* field_encode */ ,
283 0 /* field_decode */ ,
9ff9bccc 284 0, /* field_set_to_one */
32ab57cb
SL
285 ossl_ec_key_simple_priv2oct,
286 ossl_ec_key_simple_oct2priv,
9ff9bccc 287 0, /* set private */
32ab57cb
SL
288 ossl_ec_key_simple_generate_key,
289 ossl_ec_key_simple_check_key,
290 ossl_ec_key_simple_generate_public_key,
9ff9bccc
DSH
291 0, /* keycopy */
292 0, /* keyfinish */
32ab57cb
SL
293 ossl_ecdh_simple_compute_key,
294 ossl_ecdsa_simple_sign_setup,
295 ossl_ecdsa_simple_sign_sig,
296 ossl_ecdsa_simple_verify_sig,
f667820c 297 0, /* field_inverse_mod_ord */
37124360
NT
298 0, /* blind_coordinates */
299 0, /* ladder_pre */
300 0, /* ladder_step */
301 0 /* ladder_post */
0f113f3e
MC
302 };
303
304 return &ret;
305}
306
307/*
308 * Helper functions to convert field elements to/from internal representation
309 */
3e00b4c9 310static void bin28_to_felem(felem out, const u8 in[28])
0f113f3e 311{
77286fe3
BE
312 out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
313 out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
314 out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
315 out[3] = (*((const limb_aX *)(in + 20))) >> 8;
0f113f3e 316}
04daec86 317
3e00b4c9 318static void felem_to_bin28(u8 out[28], const felem in)
0f113f3e
MC
319{
320 unsigned i;
321 for (i = 0; i < 7; ++i) {
322 out[i] = in[0] >> (8 * i);
323 out[i + 7] = in[1] >> (8 * i);
324 out[i + 14] = in[2] >> (8 * i);
325 out[i + 21] = in[3] >> (8 * i);
326 }
327}
04daec86 328
04daec86 329/* From OpenSSL BIGNUM to internal representation */
3e00b4c9 330static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e 331{
0f113f3e 332 felem_bytearray b_out;
e0b660c2 333 int num_bytes;
0f113f3e 334
e0b660c2 335 if (BN_is_negative(bn)) {
9311d0c4 336 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
0f113f3e
MC
337 return 0;
338 }
e0b660c2
NT
339 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
340 if (num_bytes < 0) {
9311d0c4 341 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
0f113f3e
MC
342 return 0;
343 }
0f113f3e
MC
344 bin28_to_felem(out, b_out);
345 return 1;
346}
04daec86
BM
347
348/* From internal representation to OpenSSL BIGNUM */
3e00b4c9 349static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
0f113f3e 350{
e0b660c2
NT
351 felem_bytearray b_out;
352 felem_to_bin28(b_out, in);
353 return BN_lebin2bn(b_out, sizeof(b_out), out);
0f113f3e 354}
04daec86
BM
355
356/******************************************************************************/
3a83462d 357/*-
0f113f3e 358 * FIELD OPERATIONS
04daec86
BM
359 *
360 * Field operations, using the internal representation of field elements.
361 * NB! These operations are specific to our point multiplication and cannot be
362 * expected to be correct in general - e.g., multiplication with a large scalar
363 * will cause an overflow.
364 *
365 */
366
3e00b4c9 367static void felem_one(felem out)
0f113f3e
MC
368{
369 out[0] = 1;
370 out[1] = 0;
371 out[2] = 0;
372 out[3] = 0;
373}
3e00b4c9
BM
374
375static void felem_assign(felem out, const felem in)
0f113f3e
MC
376{
377 out[0] = in[0];
378 out[1] = in[1];
379 out[2] = in[2];
380 out[3] = in[3];
381}
3e00b4c9 382
04daec86 383/* Sum two field elements: out += in */
3e00b4c9 384static void felem_sum(felem out, const felem in)
0f113f3e
MC
385{
386 out[0] += in[0];
387 out[1] += in[1];
388 out[2] += in[2];
389 out[3] += in[3];
390}
04daec86
BM
391
392/* Subtract field elements: out -= in */
393/* Assumes in[i] < 2^57 */
3e00b4c9 394static void felem_diff(felem out, const felem in)
0f113f3e
MC
395{
396 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
397 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
398 static const limb two58m42m2 = (((limb) 1) << 58) -
399 (((limb) 1) << 42) - (((limb) 1) << 2);
400
401 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
402 out[0] += two58p2;
403 out[1] += two58m42m2;
404 out[2] += two58m2;
405 out[3] += two58m2;
406
407 out[0] -= in[0];
408 out[1] -= in[1];
409 out[2] -= in[2];
410 out[3] -= in[3];
411}
04daec86 412
3e00b4c9 413/* Subtract in unreduced 128-bit mode: out -= in */
04daec86 414/* Assumes in[i] < 2^119 */
3e00b4c9 415static void widefelem_diff(widefelem out, const widefelem in)
0f113f3e
MC
416{
417 static const widelimb two120 = ((widelimb) 1) << 120;
418 static const widelimb two120m64 = (((widelimb) 1) << 120) -
419 (((widelimb) 1) << 64);
420 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
421 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
422
423 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
424 out[0] += two120;
425 out[1] += two120m64;
426 out[2] += two120m64;
427 out[3] += two120;
428 out[4] += two120m104m64;
429 out[5] += two120m64;
430 out[6] += two120m64;
431
432 out[0] -= in[0];
433 out[1] -= in[1];
434 out[2] -= in[2];
435 out[3] -= in[3];
436 out[4] -= in[4];
437 out[5] -= in[5];
438 out[6] -= in[6];
439}
04daec86
BM
440
441/* Subtract in mixed mode: out128 -= in64 */
442/* in[i] < 2^63 */
3e00b4c9 443static void felem_diff_128_64(widefelem out, const felem in)
0f113f3e
MC
444{
445 static const widelimb two64p8 = (((widelimb) 1) << 64) +
446 (((widelimb) 1) << 8);
447 static const widelimb two64m8 = (((widelimb) 1) << 64) -
448 (((widelimb) 1) << 8);
449 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
450 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
451
452 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
453 out[0] += two64p8;
454 out[1] += two64m48m8;
455 out[2] += two64m8;
456 out[3] += two64m8;
457
458 out[0] -= in[0];
459 out[1] -= in[1];
460 out[2] -= in[2];
461 out[3] -= in[3];
462}
463
464/*
465 * Multiply a field element by a scalar: out = out * scalar The scalars we
466 * actually use are small, so results fit without overflow
467 */
3e00b4c9 468static void felem_scalar(felem out, const limb scalar)
0f113f3e
MC
469{
470 out[0] *= scalar;
471 out[1] *= scalar;
472 out[2] *= scalar;
473 out[3] *= scalar;
474}
475
476/*
477 * Multiply an unreduced field element by a scalar: out = out * scalar The
478 * scalars we actually use are small, so results fit without overflow
479 */
3e00b4c9 480static void widefelem_scalar(widefelem out, const widelimb scalar)
0f113f3e
MC
481{
482 out[0] *= scalar;
483 out[1] *= scalar;
484 out[2] *= scalar;
485 out[3] *= scalar;
486 out[4] *= scalar;
487 out[5] *= scalar;
488 out[6] *= scalar;
489}
04daec86
BM
490
491/* Square a field element: out = in^2 */
3e00b4c9 492static void felem_square(widefelem out, const felem in)
0f113f3e
MC
493{
494 limb tmp0, tmp1, tmp2;
495 tmp0 = 2 * in[0];
496 tmp1 = 2 * in[1];
497 tmp2 = 2 * in[2];
498 out[0] = ((widelimb) in[0]) * in[0];
499 out[1] = ((widelimb) in[0]) * tmp1;
500 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
501 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
502 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
503 out[5] = ((widelimb) in[3]) * tmp2;
504 out[6] = ((widelimb) in[3]) * in[3];
505}
04daec86
BM
506
507/* Multiply two field elements: out = in1 * in2 */
3e00b4c9 508static void felem_mul(widefelem out, const felem in1, const felem in2)
0f113f3e
MC
509{
510 out[0] = ((widelimb) in1[0]) * in2[0];
511 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
512 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
4eb504ae 513 ((widelimb) in1[2]) * in2[0];
0f113f3e 514 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
4eb504ae 515 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
0f113f3e 516 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
4eb504ae 517 ((widelimb) in1[3]) * in2[1];
0f113f3e
MC
518 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
519 out[6] = ((widelimb) in1[3]) * in2[3];
520}
04daec86 521
3a83462d
MC
522/*-
523 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
3e00b4c9
BM
524 * Requires in[i] < 2^126,
525 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
526static void felem_reduce(felem out, const widefelem in)
0f113f3e
MC
527{
528 static const widelimb two127p15 = (((widelimb) 1) << 127) +
529 (((widelimb) 1) << 15);
530 static const widelimb two127m71 = (((widelimb) 1) << 127) -
531 (((widelimb) 1) << 71);
532 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
533 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
534 widelimb output[5];
535
536 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
537 output[0] = in[0] + two127p15;
538 output[1] = in[1] + two127m71m55;
539 output[2] = in[2] + two127m71;
540 output[3] = in[3];
541 output[4] = in[4];
542
543 /* Eliminate in[4], in[5], in[6] */
544 output[4] += in[6] >> 16;
545 output[3] += (in[6] & 0xffff) << 40;
546 output[2] -= in[6];
547
548 output[3] += in[5] >> 16;
549 output[2] += (in[5] & 0xffff) << 40;
550 output[1] -= in[5];
551
552 output[2] += output[4] >> 16;
553 output[1] += (output[4] & 0xffff) << 40;
554 output[0] -= output[4];
555
556 /* Carry 2 -> 3 -> 4 */
557 output[3] += output[2] >> 56;
558 output[2] &= 0x00ffffffffffffff;
559
560 output[4] = output[3] >> 56;
561 output[3] &= 0x00ffffffffffffff;
562
563 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
564
565 /* Eliminate output[4] */
566 output[2] += output[4] >> 16;
567 /* output[2] < 2^56 + 2^56 = 2^57 */
568 output[1] += (output[4] & 0xffff) << 40;
569 output[0] -= output[4];
570
571 /* Carry 0 -> 1 -> 2 -> 3 */
572 output[1] += output[0] >> 56;
573 out[0] = output[0] & 0x00ffffffffffffff;
574
575 output[2] += output[1] >> 56;
576 /* output[2] < 2^57 + 2^72 */
577 out[1] = output[1] & 0x00ffffffffffffff;
578 output[3] += output[2] >> 56;
579 /* output[3] <= 2^56 + 2^16 */
580 out[2] = output[2] & 0x00ffffffffffffff;
581
50e735f9
MC
582 /*-
583 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
584 * out[3] <= 2^56 + 2^16 (due to final carry),
585 * so out < 2*p
586 */
0f113f3e
MC
587 out[3] = output[3];
588}
04daec86 589
3e00b4c9 590static void felem_square_reduce(felem out, const felem in)
0f113f3e
MC
591{
592 widefelem tmp;
593 felem_square(tmp, in);
594 felem_reduce(out, tmp);
595}
04daec86 596
3e00b4c9 597static void felem_mul_reduce(felem out, const felem in1, const felem in2)
0f113f3e
MC
598{
599 widefelem tmp;
600 felem_mul(tmp, in1, in2);
601 felem_reduce(out, tmp);
602}
603
604/*
605 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
606 * call felem_reduce first)
607 */
3e00b4c9 608static void felem_contract(felem out, const felem in)
0f113f3e
MC
609{
610 static const int64_t two56 = ((limb) 1) << 56;
611 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
612 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
613 int64_t tmp[4], a;
614 tmp[0] = in[0];
615 tmp[1] = in[1];
616 tmp[2] = in[2];
617 tmp[3] = in[3];
618 /* Case 1: a = 1 iff in >= 2^224 */
619 a = (in[3] >> 56);
620 tmp[0] -= a;
621 tmp[1] += a << 40;
622 tmp[3] &= 0x00ffffffffffffff;
623 /*
624 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
625 * and the lower part is non-zero
626 */
627 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
628 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
629 a &= 0x00ffffffffffffff;
630 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
631 a = (a - 1) >> 63;
632 /* subtract 2^224 - 2^96 + 1 if a is all-one */
633 tmp[3] &= a ^ 0xffffffffffffffff;
634 tmp[2] &= a ^ 0xffffffffffffffff;
635 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
636 tmp[0] -= 1 & a;
637
638 /*
639 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
640 * non-zero, so we only need one step
641 */
642 a = tmp[0] >> 63;
643 tmp[0] += two56 & a;
644 tmp[1] -= 1 & a;
645
646 /* carry 1 -> 2 -> 3 */
647 tmp[2] += tmp[1] >> 56;
648 tmp[1] &= 0x00ffffffffffffff;
649
650 tmp[3] += tmp[2] >> 56;
651 tmp[2] &= 0x00ffffffffffffff;
652
653 /* Now 0 <= out < p */
654 out[0] = tmp[0];
655 out[1] = tmp[1];
656 out[2] = tmp[2];
657 out[3] = tmp[3];
658}
659
dc55e4f7
DB
660/*
661 * Get negative value: out = -in
662 * Requires in[i] < 2^63,
663 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
664 */
665static void felem_neg(felem out, const felem in)
666{
80de1742
P
667 widefelem tmp;
668
669 memset(tmp, 0, sizeof(tmp));
dc55e4f7
DB
670 felem_diff_128_64(tmp, in);
671 felem_reduce(out, tmp);
672}
673
0f113f3e
MC
674/*
675 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
676 * elements are reduced to in < 2^225, so we only need to check three cases:
677 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
678 */
3e00b4c9 679static limb felem_is_zero(const felem in)
0f113f3e
MC
680{
681 limb zero, two224m96p1, two225m97p2;
682
683 zero = in[0] | in[1] | in[2] | in[3];
684 zero = (((int64_t) (zero) - 1) >> 63) & 1;
685 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
686 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
687 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
688 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
689 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
690 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
691 return (zero | two224m96p1 | two225m97p2);
692}
04daec86 693
c55b786a 694static int felem_is_zero_int(const void *in)
0f113f3e
MC
695{
696 return (int)(felem_is_zero(in) & ((limb) 1));
697}
3e00b4c9 698
04daec86
BM
699/* Invert a field element */
700/* Computation chain copied from djb's code */
3e00b4c9 701static void felem_inv(felem out, const felem in)
0f113f3e
MC
702{
703 felem ftmp, ftmp2, ftmp3, ftmp4;
704 widefelem tmp;
705 unsigned i;
706
707 felem_square(tmp, in);
708 felem_reduce(ftmp, tmp); /* 2 */
709 felem_mul(tmp, in, ftmp);
710 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
711 felem_square(tmp, ftmp);
712 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
713 felem_mul(tmp, in, ftmp);
714 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
715 felem_square(tmp, ftmp);
716 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
717 felem_square(tmp, ftmp2);
718 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
719 felem_square(tmp, ftmp2);
720 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
721 felem_mul(tmp, ftmp2, ftmp);
722 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
723 felem_square(tmp, ftmp);
724 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
725 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
726 felem_square(tmp, ftmp2);
727 felem_reduce(ftmp2, tmp);
728 }
729 felem_mul(tmp, ftmp2, ftmp);
730 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
731 felem_square(tmp, ftmp2);
732 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
733 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
734 felem_square(tmp, ftmp3);
735 felem_reduce(ftmp3, tmp);
736 }
737 felem_mul(tmp, ftmp3, ftmp2);
738 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
739 felem_square(tmp, ftmp2);
740 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
741 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
742 felem_square(tmp, ftmp3);
743 felem_reduce(ftmp3, tmp);
744 }
745 felem_mul(tmp, ftmp3, ftmp2);
746 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
747 felem_square(tmp, ftmp3);
748 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
749 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
750 felem_square(tmp, ftmp4);
751 felem_reduce(ftmp4, tmp);
752 }
753 felem_mul(tmp, ftmp3, ftmp4);
754 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
755 felem_square(tmp, ftmp3);
756 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
757 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
758 felem_square(tmp, ftmp4);
759 felem_reduce(ftmp4, tmp);
760 }
761 felem_mul(tmp, ftmp2, ftmp4);
762 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
763 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
764 felem_square(tmp, ftmp2);
765 felem_reduce(ftmp2, tmp);
766 }
767 felem_mul(tmp, ftmp2, ftmp);
768 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
769 felem_square(tmp, ftmp);
770 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
771 felem_mul(tmp, ftmp, in);
772 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
773 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
774 felem_square(tmp, ftmp);
775 felem_reduce(ftmp, tmp);
776 }
777 felem_mul(tmp, ftmp, ftmp3);
778 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
779}
780
781/*
782 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
783 * out to itself.
784 */
785static void copy_conditional(felem out, const felem in, limb icopy)
786{
787 unsigned i;
788 /*
789 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
790 */
791 const limb copy = -icopy;
792 for (i = 0; i < 4; ++i) {
793 const limb tmp = copy & (in[i] ^ out[i]);
794 out[i] ^= tmp;
795 }
796}
04daec86 797
04daec86 798/******************************************************************************/
3a83462d 799/*-
0f113f3e 800 * ELLIPTIC CURVE POINT OPERATIONS
04daec86
BM
801 *
802 * Points are represented in Jacobian projective coordinates:
803 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
804 * or to the point at infinity if Z == 0.
805 *
806 */
807
1d97c843
TH
808/*-
809 * Double an elliptic curve point:
04daec86
BM
810 * (X', Y', Z') = 2 * (X, Y, Z), where
811 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
eb8e052c 812 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
04daec86
BM
813 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
814 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
0f113f3e 815 * while x_out == y_in is not (maybe this works, but it's not tested).
1d97c843 816 */
04daec86 817static void
3e00b4c9
BM
818point_double(felem x_out, felem y_out, felem z_out,
819 const felem x_in, const felem y_in, const felem z_in)
0f113f3e
MC
820{
821 widefelem tmp, tmp2;
822 felem delta, gamma, beta, alpha, ftmp, ftmp2;
823
824 felem_assign(ftmp, x_in);
825 felem_assign(ftmp2, x_in);
826
827 /* delta = z^2 */
828 felem_square(tmp, z_in);
829 felem_reduce(delta, tmp);
830
831 /* gamma = y^2 */
832 felem_square(tmp, y_in);
833 felem_reduce(gamma, tmp);
834
835 /* beta = x*gamma */
836 felem_mul(tmp, x_in, gamma);
837 felem_reduce(beta, tmp);
838
839 /* alpha = 3*(x-delta)*(x+delta) */
840 felem_diff(ftmp, delta);
841 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
842 felem_sum(ftmp2, delta);
843 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
844 felem_scalar(ftmp2, 3);
845 /* ftmp2[i] < 3 * 2^58 < 2^60 */
846 felem_mul(tmp, ftmp, ftmp2);
847 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
848 felem_reduce(alpha, tmp);
849
850 /* x' = alpha^2 - 8*beta */
851 felem_square(tmp, alpha);
852 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
853 felem_assign(ftmp, beta);
854 felem_scalar(ftmp, 8);
855 /* ftmp[i] < 8 * 2^57 = 2^60 */
856 felem_diff_128_64(tmp, ftmp);
857 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
858 felem_reduce(x_out, tmp);
859
860 /* z' = (y + z)^2 - gamma - delta */
861 felem_sum(delta, gamma);
862 /* delta[i] < 2^57 + 2^57 = 2^58 */
863 felem_assign(ftmp, y_in);
864 felem_sum(ftmp, z_in);
865 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
866 felem_square(tmp, ftmp);
867 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
868 felem_diff_128_64(tmp, delta);
869 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
870 felem_reduce(z_out, tmp);
871
872 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
873 felem_scalar(beta, 4);
874 /* beta[i] < 4 * 2^57 = 2^59 */
875 felem_diff(beta, x_out);
876 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
877 felem_mul(tmp, alpha, beta);
878 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
879 felem_square(tmp2, gamma);
880 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
881 widefelem_scalar(tmp2, 8);
882 /* tmp2[i] < 8 * 2^116 = 2^119 */
883 widefelem_diff(tmp, tmp2);
884 /* tmp[i] < 2^119 + 2^120 < 2^121 */
885 felem_reduce(y_out, tmp);
886}
04daec86 887
1d97c843
TH
888/*-
889 * Add two elliptic curve points:
04daec86
BM
890 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
891 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
892 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
893 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
894 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
3e00b4c9
BM
895 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
896 *
897 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
898 */
04daec86 899
0f113f3e
MC
900/*
901 * This function is not entirely constant-time: it includes a branch for
902 * checking whether the two input points are equal, (while not equal to the
903 * point at infinity). This case never happens during single point
904 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
905 */
3e00b4c9 906static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
907 const felem x1, const felem y1, const felem z1,
908 const int mixed, const felem x2, const felem y2,
909 const felem z2)
910{
911 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
912 widefelem tmp, tmp2;
913 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
0164bf81 914 limb points_equal;
0f113f3e
MC
915
916 if (!mixed) {
917 /* ftmp2 = z2^2 */
918 felem_square(tmp, z2);
919 felem_reduce(ftmp2, tmp);
920
921 /* ftmp4 = z2^3 */
922 felem_mul(tmp, ftmp2, z2);
923 felem_reduce(ftmp4, tmp);
924
925 /* ftmp4 = z2^3*y1 */
926 felem_mul(tmp2, ftmp4, y1);
927 felem_reduce(ftmp4, tmp2);
928
929 /* ftmp2 = z2^2*x1 */
930 felem_mul(tmp2, ftmp2, x1);
931 felem_reduce(ftmp2, tmp2);
932 } else {
933 /*
934 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
935 */
936
937 /* ftmp4 = z2^3*y1 */
938 felem_assign(ftmp4, y1);
939
940 /* ftmp2 = z2^2*x1 */
941 felem_assign(ftmp2, x1);
942 }
943
944 /* ftmp = z1^2 */
945 felem_square(tmp, z1);
946 felem_reduce(ftmp, tmp);
947
948 /* ftmp3 = z1^3 */
949 felem_mul(tmp, ftmp, z1);
950 felem_reduce(ftmp3, tmp);
951
952 /* tmp = z1^3*y2 */
953 felem_mul(tmp, ftmp3, y2);
954 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
955
956 /* ftmp3 = z1^3*y2 - z2^3*y1 */
957 felem_diff_128_64(tmp, ftmp4);
958 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
959 felem_reduce(ftmp3, tmp);
960
961 /* tmp = z1^2*x2 */
962 felem_mul(tmp, ftmp, x2);
963 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
964
965 /* ftmp = z1^2*x2 - z2^2*x1 */
966 felem_diff_128_64(tmp, ftmp2);
967 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
968 felem_reduce(ftmp, tmp);
969
970 /*
0164bf81
NT
971 * The formulae are incorrect if the points are equal, in affine coordinates
972 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
973 * happens.
974 *
975 * We use bitwise operations to avoid potential side-channels introduced by
976 * the short-circuiting behaviour of boolean operators.
0f113f3e
MC
977 */
978 x_equal = felem_is_zero(ftmp);
979 y_equal = felem_is_zero(ftmp3);
0164bf81
NT
980 /*
981 * The special case of either point being the point at infinity (z1 and/or
982 * z2 are zero), is handled separately later on in this function, so we
983 * avoid jumping to point_double here in those special cases.
984 */
0f113f3e
MC
985 z1_is_zero = felem_is_zero(z1);
986 z2_is_zero = felem_is_zero(z2);
0164bf81
NT
987
988 /*
989 * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
990 * specific implementation `felem_is_zero()` returns truth as `0x1`
991 * (rather than `0xff..ff`).
992 *
993 * This implies that `~true` in this implementation becomes
994 * `0xff..fe` (rather than `0x0`): for this reason, to be used in
995 * the if expression, we mask out only the last bit in the next
996 * line.
997 */
998 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
999
1000 if (points_equal) {
1001 /*
1002 * This is obviously not constant-time but, as mentioned before, this
1003 * case never happens during single point multiplication, so there is no
1004 * timing leak for ECDH or ECDSA signing.
1005 */
0f113f3e
MC
1006 point_double(x3, y3, z3, x1, y1, z1);
1007 return;
1008 }
1009
1010 /* ftmp5 = z1*z2 */
1011 if (!mixed) {
1012 felem_mul(tmp, z1, z2);
1013 felem_reduce(ftmp5, tmp);
1014 } else {
1015 /* special case z2 = 0 is handled later */
1016 felem_assign(ftmp5, z1);
1017 }
1018
1019 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1020 felem_mul(tmp, ftmp, ftmp5);
1021 felem_reduce(z_out, tmp);
1022
1023 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1024 felem_assign(ftmp5, ftmp);
1025 felem_square(tmp, ftmp);
1026 felem_reduce(ftmp, tmp);
1027
1028 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1029 felem_mul(tmp, ftmp, ftmp5);
1030 felem_reduce(ftmp5, tmp);
1031
1032 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033 felem_mul(tmp, ftmp2, ftmp);
1034 felem_reduce(ftmp2, tmp);
1035
1036 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1037 felem_mul(tmp, ftmp4, ftmp5);
1038 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1039
1040 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1041 felem_square(tmp2, ftmp3);
1042 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1043
1044 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1045 felem_diff_128_64(tmp2, ftmp5);
1046 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1047
1048 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1049 felem_assign(ftmp5, ftmp2);
1050 felem_scalar(ftmp5, 2);
1051 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1052
50e735f9
MC
1053 /*-
1054 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1055 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1056 */
0f113f3e
MC
1057 felem_diff_128_64(tmp2, ftmp5);
1058 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1059 felem_reduce(x_out, tmp2);
1060
1061 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1062 felem_diff(ftmp2, x_out);
1063 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1064
1065 /*
1066 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1067 */
1068 felem_mul(tmp2, ftmp3, ftmp2);
1069 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1070
50e735f9
MC
1071 /*-
1072 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1073 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1074 */
0f113f3e
MC
1075 widefelem_diff(tmp2, tmp);
1076 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1077 felem_reduce(y_out, tmp2);
1078
1079 /*
1080 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1081 * the point at infinity, so we need to check for this separately
1082 */
1083
1084 /*
1085 * if point 1 is at infinity, copy point 2 to output, and vice versa
1086 */
1087 copy_conditional(x_out, x2, z1_is_zero);
1088 copy_conditional(x_out, x1, z2_is_zero);
1089 copy_conditional(y_out, y2, z1_is_zero);
1090 copy_conditional(y_out, y1, z2_is_zero);
1091 copy_conditional(z_out, z2, z1_is_zero);
1092 copy_conditional(z_out, z1, z2_is_zero);
1093 felem_assign(x3, x_out);
1094 felem_assign(y3, y_out);
1095 felem_assign(z3, z_out);
1096}
04daec86 1097
dbd87ffc
MC
1098/*
1099 * select_point selects the |idx|th point from a precomputation table and
1100 * copies it to out.
1101 * The pre_comp array argument should be size of |size| argument
1102 */
0f113f3e
MC
1103static void select_point(const u64 idx, unsigned int size,
1104 const felem pre_comp[][3], felem out[3])
1105{
1106 unsigned i, j;
1107 limb *outlimbs = &out[0][0];
0f113f3e 1108
88f4c6f3 1109 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1110 for (i = 0; i < size; i++) {
1111 const limb *inlimbs = &pre_comp[i][0][0];
1112 u64 mask = i ^ idx;
1113 mask |= mask >> 4;
1114 mask |= mask >> 2;
1115 mask |= mask >> 1;
1116 mask &= 1;
1117 mask--;
1118 for (j = 0; j < 4 * 3; j++)
1119 outlimbs[j] |= inlimbs[j] & mask;
1120 }
1121}
3e00b4c9
BM
1122
1123/* get_bit returns the |i|th bit in |in| */
1124static char get_bit(const felem_bytearray in, unsigned i)
0f113f3e
MC
1125{
1126 if (i >= 224)
1127 return 0;
1128 return (in[i >> 3] >> (i & 7)) & 1;
1129}
1130
1131/*
1132 * Interleaved point multiplication using precomputed point multiples: The
1133 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1134 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1135 * generator, using certain (large) precomputed multiples in g_pre_comp.
1136 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1137 */
3e00b4c9 1138static void batch_mul(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1139 const felem_bytearray scalars[],
1140 const unsigned num_points, const u8 *g_scalar,
1141 const int mixed, const felem pre_comp[][17][3],
1142 const felem g_pre_comp[2][16][3])
1143{
1144 int i, skip;
1145 unsigned num;
1146 unsigned gen_mul = (g_scalar != NULL);
1147 felem nq[3], tmp[4];
1148 u64 bits;
1149 u8 sign, digit;
1150
1151 /* set nq to the point at infinity */
16f8d4eb 1152 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1153
1154 /*
1155 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1156 * of the generator (two in each of the last 28 rounds) and additions of
1157 * other points multiples (every 5th round).
1158 */
1159 skip = 1; /* save two point operations in the first
1160 * round */
1161 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1162 /* double */
1163 if (!skip)
1164 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1165
1166 /* add multiples of the generator */
1167 if (gen_mul && (i <= 27)) {
1168 /* first, look 28 bits upwards */
1169 bits = get_bit(g_scalar, i + 196) << 3;
1170 bits |= get_bit(g_scalar, i + 140) << 2;
1171 bits |= get_bit(g_scalar, i + 84) << 1;
1172 bits |= get_bit(g_scalar, i + 28);
1173 /* select the point to add, in constant time */
1174 select_point(bits, 16, g_pre_comp[1], tmp);
1175
1176 if (!skip) {
1177 /* value 1 below is argument for "mixed" */
1178 point_add(nq[0], nq[1], nq[2],
1179 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1180 } else {
1181 memcpy(nq, tmp, 3 * sizeof(felem));
1182 skip = 0;
1183 }
1184
1185 /* second, look at the current position */
1186 bits = get_bit(g_scalar, i + 168) << 3;
1187 bits |= get_bit(g_scalar, i + 112) << 2;
1188 bits |= get_bit(g_scalar, i + 56) << 1;
1189 bits |= get_bit(g_scalar, i);
1190 /* select the point to add, in constant time */
1191 select_point(bits, 16, g_pre_comp[0], tmp);
1192 point_add(nq[0], nq[1], nq[2],
1193 nq[0], nq[1], nq[2],
1194 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1195 }
1196
1197 /* do other additions every 5 doublings */
1198 if (num_points && (i % 5 == 0)) {
1199 /* loop over all scalars */
1200 for (num = 0; num < num_points; ++num) {
1201 bits = get_bit(scalars[num], i + 4) << 5;
1202 bits |= get_bit(scalars[num], i + 3) << 4;
1203 bits |= get_bit(scalars[num], i + 2) << 3;
1204 bits |= get_bit(scalars[num], i + 1) << 2;
1205 bits |= get_bit(scalars[num], i) << 1;
1206 bits |= get_bit(scalars[num], i - 1);
32ab57cb 1207 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
0f113f3e
MC
1208
1209 /* select the point to add or subtract */
1210 select_point(digit, 17, pre_comp[num], tmp);
1211 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1212 * point */
1213 copy_conditional(tmp[1], tmp[3], sign);
1214
1215 if (!skip) {
1216 point_add(nq[0], nq[1], nq[2],
1217 nq[0], nq[1], nq[2],
1218 mixed, tmp[0], tmp[1], tmp[2]);
1219 } else {
1220 memcpy(nq, tmp, 3 * sizeof(felem));
1221 skip = 0;
1222 }
1223 }
1224 }
1225 }
1226 felem_assign(x_out, nq[0]);
1227 felem_assign(y_out, nq[1]);
1228 felem_assign(z_out, nq[2]);
1229}
04daec86
BM
1230
1231/******************************************************************************/
0f113f3e
MC
1232/*
1233 * FUNCTIONS TO MANAGE PRECOMPUTATION
04daec86
BM
1234 */
1235
3f5abab9 1236static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
0f113f3e 1237{
b51bce94
RS
1238 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1239
e077455e 1240 if (ret == NULL)
0f113f3e 1241 return ret;
9b398ef2 1242
9b398ef2 1243
1353736b 1244 if (!CRYPTO_NEW_REF(&ret->references, 1)) {
9b398ef2
AG
1245 OPENSSL_free(ret);
1246 return NULL;
1247 }
0f113f3e
MC
1248 return ret;
1249}
04daec86 1250
3aef36ff 1251NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
0f113f3e 1252{
9b398ef2 1253 int i;
3aef36ff 1254 if (p != NULL)
1353736b 1255 CRYPTO_UP_REF(&p->references, &i);
3aef36ff 1256 return p;
0f113f3e 1257}
04daec86 1258
3aef36ff 1259void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
0f113f3e 1260{
9b398ef2
AG
1261 int i;
1262
1263 if (p == NULL)
1264 return;
1265
1353736b 1266 CRYPTO_DOWN_REF(&p->references, &i);
a935791d 1267 REF_PRINT_COUNT("EC_nistp224", p);
9b398ef2 1268 if (i > 0)
0f113f3e 1269 return;
9b398ef2
AG
1270 REF_ASSERT_ISNT(i < 0);
1271
1353736b 1272 CRYPTO_FREE_REF(&p->references);
3aef36ff 1273 OPENSSL_free(p);
0f113f3e 1274}
04daec86
BM
1275
1276/******************************************************************************/
0f113f3e
MC
1277/*
1278 * OPENSSL EC_METHOD FUNCTIONS
04daec86
BM
1279 */
1280
32ab57cb 1281int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
0f113f3e
MC
1282{
1283 int ret;
32ab57cb 1284 ret = ossl_ec_GFp_simple_group_init(group);
0f113f3e
MC
1285 group->a_is_minus3 = 1;
1286 return ret;
1287}
04daec86 1288
32ab57cb
SL
1289int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1290 const BIGNUM *a, const BIGNUM *b,
1291 BN_CTX *ctx)
0f113f3e
MC
1292{
1293 int ret = 0;
0f113f3e 1294 BIGNUM *curve_p, *curve_a, *curve_b;
f844f9eb 1295#ifndef FIPS_MODULE
a9612d6c 1296 BN_CTX *new_ctx = NULL;
0f113f3e
MC
1297
1298 if (ctx == NULL)
a6482df0 1299 ctx = new_ctx = BN_CTX_new();
a9612d6c
MC
1300#endif
1301 if (ctx == NULL)
1302 return 0;
1303
0f113f3e 1304 BN_CTX_start(ctx);
edea42c6
PY
1305 curve_p = BN_CTX_get(ctx);
1306 curve_a = BN_CTX_get(ctx);
1307 curve_b = BN_CTX_get(ctx);
1308 if (curve_b == NULL)
0f113f3e
MC
1309 goto err;
1310 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1311 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1312 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1313 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
9311d0c4 1314 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
0f113f3e
MC
1315 goto err;
1316 }
1317 group->field_mod_func = BN_nist_mod_224;
32ab57cb 1318 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
0f113f3e
MC
1319 err:
1320 BN_CTX_end(ctx);
f844f9eb 1321#ifndef FIPS_MODULE
23a1d5e9 1322 BN_CTX_free(new_ctx);
a9612d6c 1323#endif
0f113f3e
MC
1324 return ret;
1325}
1326
1327/*
1328 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1329 * (X/Z^2, Y/Z^3)
1330 */
32ab57cb
SL
1331int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1332 const EC_POINT *point,
1333 BIGNUM *x, BIGNUM *y,
1334 BN_CTX *ctx)
0f113f3e
MC
1335{
1336 felem z1, z2, x_in, y_in, x_out, y_out;
1337 widefelem tmp;
1338
1339 if (EC_POINT_is_at_infinity(group, point)) {
9311d0c4 1340 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
0f113f3e
MC
1341 return 0;
1342 }
ace8f546
AP
1343 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1344 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1345 return 0;
1346 felem_inv(z2, z1);
1347 felem_square(tmp, z2);
1348 felem_reduce(z1, tmp);
1349 felem_mul(tmp, x_in, z1);
1350 felem_reduce(x_in, tmp);
1351 felem_contract(x_out, x_in);
1352 if (x != NULL) {
1353 if (!felem_to_BN(x, x_out)) {
9311d0c4 1354 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1355 return 0;
1356 }
1357 }
1358 felem_mul(tmp, z1, z2);
1359 felem_reduce(z1, tmp);
1360 felem_mul(tmp, y_in, z1);
1361 felem_reduce(y_in, tmp);
1362 felem_contract(y_out, y_in);
1363 if (y != NULL) {
1364 if (!felem_to_BN(y, y_out)) {
9311d0c4 1365 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1366 return 0;
1367 }
1368 }
1369 return 1;
1370}
1371
1372static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373 felem tmp_felems[ /* num+1 */ ])
1374{
1375 /*
1376 * Runs in constant time, unless an input is the point at infinity (which
1377 * normally shouldn't happen).
1378 */
32ab57cb
SL
1379 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1380 points,
1381 sizeof(felem),
1382 tmp_felems,
1383 (void (*)(void *))felem_one,
1384 felem_is_zero_int,
1385 (void (*)(void *, const void *))
1386 felem_assign,
1387 (void (*)(void *, const void *))
1388 felem_square_reduce, (void (*)
1389 (void *,
1390 const void
1391 *,
1392 const void
1393 *))
1394 felem_mul_reduce,
1395 (void (*)(void *, const void *))
1396 felem_inv,
1397 (void (*)(void *, const void *))
1398 felem_contract);
0f113f3e
MC
1399}
1400
1401/*
1402 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403 * values Result is stored in r (r can equal one of the inputs).
1404 */
32ab57cb
SL
1405int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406 const BIGNUM *scalar, size_t num,
1407 const EC_POINT *points[],
1408 const BIGNUM *scalars[], BN_CTX *ctx)
0f113f3e
MC
1409{
1410 int ret = 0;
1411 int j;
1412 unsigned i;
1413 int mixed = 0;
0f113f3e
MC
1414 BIGNUM *x, *y, *z, *tmp_scalar;
1415 felem_bytearray g_secret;
1416 felem_bytearray *secrets = NULL;
16f8d4eb 1417 felem (*pre_comp)[17][3] = NULL;
0f113f3e 1418 felem *tmp_felems = NULL;
e0b660c2 1419 int num_bytes;
0f113f3e
MC
1420 int have_pre_comp = 0;
1421 size_t num_points = num;
1422 felem x_in, y_in, z_in, x_out, y_out, z_out;
1423 NISTP224_PRE_COMP *pre = NULL;
1424 const felem(*g_pre_comp)[16][3] = NULL;
1425 EC_POINT *generator = NULL;
1426 const EC_POINT *p = NULL;
1427 const BIGNUM *p_scalar = NULL;
1428
0f113f3e 1429 BN_CTX_start(ctx);
edea42c6
PY
1430 x = BN_CTX_get(ctx);
1431 y = BN_CTX_get(ctx);
1432 z = BN_CTX_get(ctx);
1433 tmp_scalar = BN_CTX_get(ctx);
1434 if (tmp_scalar == NULL)
0f113f3e
MC
1435 goto err;
1436
1437 if (scalar != NULL) {
3aef36ff 1438 pre = group->pre_comp.nistp224;
0f113f3e
MC
1439 if (pre)
1440 /* we have precomputation, try to use it */
1441 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442 else
1443 /* try to use the standard precomputation */
1444 g_pre_comp = &gmul[0];
1445 generator = EC_POINT_new(group);
1446 if (generator == NULL)
1447 goto err;
1448 /* get the generator from precomputation */
1449 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451 !felem_to_BN(z, g_pre_comp[0][1][2])) {
9311d0c4 1452 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1453 goto err;
1454 }
32ab57cb
SL
1455 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1456 generator,
1457 x, y, z, ctx))
0f113f3e
MC
1458 goto err;
1459 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460 /* precomputation matches generator */
1461 have_pre_comp = 1;
1462 else
1463 /*
1464 * we don't have valid precomputation: treat the generator as a
1465 * random point
1466 */
1467 num_points = num_points + 1;
1468 }
1469
1470 if (num_points > 0) {
1471 if (num_points >= 3) {
1472 /*
1473 * unless we precompute multiples for just one or two points,
1474 * converting those into affine form is time well spent
1475 */
1476 mixed = 1;
1477 }
b51bce94
RS
1478 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
1480 if (mixed)
1481 tmp_felems =
16f8d4eb 1482 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
0f113f3e 1483 if ((secrets == NULL) || (pre_comp == NULL)
e077455e 1484 || (mixed && (tmp_felems == NULL)))
0f113f3e 1485 goto err;
0f113f3e
MC
1486
1487 /*
1488 * we treat NULL scalars as 0, and NULL points as points at infinity,
1489 * i.e., they contribute nothing to the linear combination
1490 */
0f113f3e 1491 for (i = 0; i < num_points; ++i) {
4fe2ee3a 1492 if (i == num) {
0f113f3e 1493 /* the generator */
0f113f3e
MC
1494 p = EC_GROUP_get0_generator(group);
1495 p_scalar = scalar;
4fe2ee3a 1496 } else {
0f113f3e 1497 /* the i^th point */
0f113f3e
MC
1498 p = points[i];
1499 p_scalar = scalars[i];
1500 }
1501 if ((p_scalar != NULL) && (p != NULL)) {
1502 /* reduce scalar to 0 <= scalar < 2^224 */
1503 if ((BN_num_bits(p_scalar) > 224)
1504 || (BN_is_negative(p_scalar))) {
1505 /*
1506 * this is an unusual input, and we don't guarantee
1507 * constant-timeness
1508 */
ace8f546 1509 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
9311d0c4 1510 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1511 goto err;
1512 }
e0b660c2
NT
1513 num_bytes = BN_bn2lebinpad(tmp_scalar,
1514 secrets[i], sizeof(secrets[i]));
1515 } else {
1516 num_bytes = BN_bn2lebinpad(p_scalar,
1517 secrets[i], sizeof(secrets[i]));
1518 }
1519 if (num_bytes < 0) {
9311d0c4 1520 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
e0b660c2
NT
1521 goto err;
1522 }
0f113f3e 1523 /* precompute multiples */
ace8f546
AP
1524 if ((!BN_to_felem(x_out, p->X)) ||
1525 (!BN_to_felem(y_out, p->Y)) ||
1526 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
1527 goto err;
1528 felem_assign(pre_comp[i][1][0], x_out);
1529 felem_assign(pre_comp[i][1][1], y_out);
1530 felem_assign(pre_comp[i][1][2], z_out);
1531 for (j = 2; j <= 16; ++j) {
1532 if (j & 1) {
1533 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1534 pre_comp[i][j][2], pre_comp[i][1][0],
1535 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1536 pre_comp[i][j - 1][0],
1537 pre_comp[i][j - 1][1],
1538 pre_comp[i][j - 1][2]);
1539 } else {
1540 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1541 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1542 pre_comp[i][j / 2][1],
1543 pre_comp[i][j / 2][2]);
1544 }
1545 }
1546 }
1547 }
1548 if (mixed)
1549 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1550 }
1551
1552 /* the scalar for the generator */
1553 if ((scalar != NULL) && (have_pre_comp)) {
16f8d4eb 1554 memset(g_secret, 0, sizeof(g_secret));
0f113f3e
MC
1555 /* reduce scalar to 0 <= scalar < 2^224 */
1556 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1557 /*
1558 * this is an unusual input, and we don't guarantee
1559 * constant-timeness
1560 */
ace8f546 1561 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
9311d0c4 1562 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1563 goto err;
1564 }
e0b660c2 1565 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
4fe2ee3a 1566 } else {
e0b660c2 1567 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
4fe2ee3a 1568 }
0f113f3e
MC
1569 /* do the multiplication with generator precomputation */
1570 batch_mul(x_out, y_out, z_out,
1571 (const felem_bytearray(*))secrets, num_points,
1572 g_secret,
1573 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
4fe2ee3a 1574 } else {
0f113f3e
MC
1575 /* do the multiplication without generator precomputation */
1576 batch_mul(x_out, y_out, z_out,
1577 (const felem_bytearray(*))secrets, num_points,
1578 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
4fe2ee3a 1579 }
0f113f3e
MC
1580 /* reduce the output to its unique minimal representation */
1581 felem_contract(x_in, x_out);
1582 felem_contract(y_in, y_out);
1583 felem_contract(z_in, z_out);
1584 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1585 (!felem_to_BN(z, z_in))) {
9311d0c4 1586 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1587 goto err;
1588 }
32ab57cb
SL
1589 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1590 ctx);
0f113f3e
MC
1591
1592 err:
1593 BN_CTX_end(ctx);
8fdc3734 1594 EC_POINT_free(generator);
b548a1f1
RS
1595 OPENSSL_free(secrets);
1596 OPENSSL_free(pre_comp);
1597 OPENSSL_free(tmp_felems);
0f113f3e
MC
1598 return ret;
1599}
04daec86 1600
32ab57cb 1601int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
1602{
1603 int ret = 0;
1604 NISTP224_PRE_COMP *pre = NULL;
1605 int i, j;
0f113f3e
MC
1606 BIGNUM *x, *y;
1607 EC_POINT *generator = NULL;
1608 felem tmp_felems[32];
f844f9eb 1609#ifndef FIPS_MODULE
a9612d6c
MC
1610 BN_CTX *new_ctx = NULL;
1611#endif
0f113f3e
MC
1612
1613 /* throw away old precomputation */
2c52ac9b 1614 EC_pre_comp_free(group);
a9612d6c 1615
f844f9eb 1616#ifndef FIPS_MODULE
0f113f3e 1617 if (ctx == NULL)
a6482df0 1618 ctx = new_ctx = BN_CTX_new();
a9612d6c
MC
1619#endif
1620 if (ctx == NULL)
1621 return 0;
1622
0f113f3e 1623 BN_CTX_start(ctx);
edea42c6
PY
1624 x = BN_CTX_get(ctx);
1625 y = BN_CTX_get(ctx);
1626 if (y == NULL)
0f113f3e
MC
1627 goto err;
1628 /* get the generator */
1629 if (group->generator == NULL)
1630 goto err;
1631 generator = EC_POINT_new(group);
1632 if (generator == NULL)
1633 goto err;
1634 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1635 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
9cc570d4 1636 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
0f113f3e
MC
1637 goto err;
1638 if ((pre = nistp224_pre_comp_new()) == NULL)
1639 goto err;
1640 /*
1641 * if the generator is the standard one, use built-in precomputation
1642 */
1643 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1644 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 1645 goto done;
0f113f3e 1646 }
ace8f546
AP
1647 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1648 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1649 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
0f113f3e
MC
1650 goto err;
1651 /*
1652 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1653 * 2^140*G, 2^196*G for the second one
1654 */
1655 for (i = 1; i <= 8; i <<= 1) {
1656 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1657 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1658 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1659 for (j = 0; j < 27; ++j) {
1660 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1662 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1663 }
1664 if (i == 8)
1665 break;
1666 point_double(pre->g_pre_comp[0][2 * i][0],
1667 pre->g_pre_comp[0][2 * i][1],
1668 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1669 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1670 for (j = 0; j < 27; ++j) {
1671 point_double(pre->g_pre_comp[0][2 * i][0],
1672 pre->g_pre_comp[0][2 * i][1],
1673 pre->g_pre_comp[0][2 * i][2],
1674 pre->g_pre_comp[0][2 * i][0],
1675 pre->g_pre_comp[0][2 * i][1],
1676 pre->g_pre_comp[0][2 * i][2]);
1677 }
1678 }
1679 for (i = 0; i < 2; i++) {
1680 /* g_pre_comp[i][0] is the point at infinity */
1681 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1682 /* the remaining multiples */
1683 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1684 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1685 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1686 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1687 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688 pre->g_pre_comp[i][2][2]);
1689 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1690 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1691 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1692 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1694 pre->g_pre_comp[i][2][2]);
1695 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1696 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1697 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1698 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1699 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1700 pre->g_pre_comp[i][4][2]);
1701 /*
1702 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1703 */
1704 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1705 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1706 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1707 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708 pre->g_pre_comp[i][2][2]);
1709 for (j = 1; j < 8; ++j) {
1710 /* odd multiples: add G resp. 2^28*G */
1711 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1712 pre->g_pre_comp[i][2 * j + 1][1],
1713 pre->g_pre_comp[i][2 * j + 1][2],
1714 pre->g_pre_comp[i][2 * j][0],
1715 pre->g_pre_comp[i][2 * j][1],
1716 pre->g_pre_comp[i][2 * j][2], 0,
1717 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1718 pre->g_pre_comp[i][1][2]);
1719 }
1720 }
1721 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1722
615614c8 1723 done:
3aef36ff 1724 SETPRECOMP(group, nistp224, pre);
0f113f3e 1725 pre = NULL;
3aef36ff 1726 ret = 1;
04daec86 1727 err:
0f113f3e 1728 BN_CTX_end(ctx);
8fdc3734 1729 EC_POINT_free(generator);
f844f9eb 1730#ifndef FIPS_MODULE
23a1d5e9 1731 BN_CTX_free(new_ctx);
a9612d6c 1732#endif
3aef36ff 1733 EC_nistp224_pre_comp_free(pre);
0f113f3e
MC
1734 return ret;
1735}
04daec86 1736
32ab57cb 1737int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
0f113f3e 1738{
3aef36ff 1739 return HAVEPRECOMP(group, nistp224);
0f113f3e 1740}