]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ec/ecp_nistp224.c
Fix external symbols related to ec & sm2 keys
[thirdparty/openssl.git] / crypto / ec / ecp_nistp224.c
CommitLineData
04daec86 1/*
33388b44 2 * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
4f22f405 3 *
a7f182b7 4 * Licensed under the Apache License 2.0 (the "License"). You may not use
4f22f405
RS
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
04daec86 8 */
4f22f405 9
3e00b4c9 10/* Copyright 2011 Google Inc.
04daec86 11 *
3e00b4c9 12 * Licensed under the Apache License, Version 2.0 (the "License");
04daec86 13 *
3e00b4c9
BM
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
04daec86 16 *
3e00b4c9 17 * http://www.apache.org/licenses/LICENSE-2.0
04daec86 18 *
3e00b4c9
BM
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
04daec86
BM
24 */
25
579422c8
P
26/*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30#include "internal/deprecated.h"
31
04daec86
BM
32/*
33 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34 *
35 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36 * and Adam Langley's public domain 64-bit C implementation of curve25519
37 */
e0d6132b
BM
38
39#include <openssl/opensslconf.h>
e0d6132b 40
705536e2
RS
41#include <stdint.h>
42#include <string.h>
43#include <openssl/err.h>
44#include "ec_local.h"
04daec86 45
705536e2 46#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
396cb565 47 /* even with gcc, the typedef won't work for 32-bit platforms */
0f113f3e
MC
48typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
49 * platforms */
705536e2
RS
50#else
51# error "Your compiler doesn't appear to support 128-bit integer types"
52#endif
04daec86
BM
53
54typedef uint8_t u8;
3e00b4c9 55typedef uint64_t u64;
04daec86 56
04daec86 57/******************************************************************************/
1d97c843
TH
58/*-
59 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
04daec86
BM
60 *
61 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
3e00b4c9
BM
62 * using 64-bit coefficients called 'limbs',
63 * and sometimes (for multiplication results) as
04daec86 64 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
3e00b4c9
BM
65 * using 128-bit coefficients called 'widelimbs'.
66 * A 4-limb representation is an 'felem';
67 * a 7-widelimb representation is a 'widefelem'.
68 * Even within felems, bits of adjacent limbs overlap, and we don't always
69 * reduce the representations: we ensure that inputs to each felem
04daec86
BM
70 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
71 * and fit into a 128-bit word without overflow. The coefficients are then
3e00b4c9
BM
72 * again partially reduced to obtain an felem satisfying a_i < 2^57.
73 * We only reduce to the unique minimal representation at the end of the
74 * computation.
04daec86
BM
75 */
76
3e00b4c9 77typedef uint64_t limb;
77286fe3 78typedef uint64_t limb_aX __attribute((__aligned__(1)));
3e00b4c9
BM
79typedef uint128_t widelimb;
80
81typedef limb felem[4];
82typedef widelimb widefelem[7];
04daec86 83
0f113f3e 84/*
46f4e1be 85 * Field element represented as a byte array. 28*8 = 224 bits is also the
0f113f3e
MC
86 * group order size for the elliptic curve, and we also use this type for
87 * scalars for point multiplication.
88 */
396cb565
BM
89typedef u8 felem_bytearray[28];
90
91static const felem_bytearray nistp224_curve_params[5] = {
0f113f3e
MC
92 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
94 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
95 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
96 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
97 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
98 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
99 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
100 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
101 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
102 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
103 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
104 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
105 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
106 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
396cb565 107};
04daec86 108
1d97c843
TH
109/*-
110 * Precomputed multiples of the standard generator
3e00b4c9
BM
111 * Points are given in coordinates (X, Y, Z) where Z normally is 1
112 * (0 for the point at infinity).
113 * For each field element, slice a_0 is word 0, etc.
114 *
115 * The table has 2 * 16 elements, starting with the following:
116 * index | bits | point
117 * ------+---------+------------------------------
118 * 0 | 0 0 0 0 | 0G
119 * 1 | 0 0 0 1 | 1G
120 * 2 | 0 0 1 0 | 2^56G
121 * 3 | 0 0 1 1 | (2^56 + 1)G
122 * 4 | 0 1 0 0 | 2^112G
123 * 5 | 0 1 0 1 | (2^112 + 1)G
124 * 6 | 0 1 1 0 | (2^112 + 2^56)G
125 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
126 * 8 | 1 0 0 0 | 2^168G
127 * 9 | 1 0 0 1 | (2^168 + 1)G
128 * 10 | 1 0 1 0 | (2^168 + 2^56)G
129 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
130 * 12 | 1 1 0 0 | (2^168 + 2^112)G
131 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
132 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
133 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
134 * followed by a copy of this with each element multiplied by 2^28.
135 *
136 * The reason for this is so that we can clock bits into four different
137 * locations when doing simple scalar multiplies against the base point,
138 * and then another four locations using the second 16 elements.
139 */
4eb504ae
AP
140static const felem gmul[2][16][3] = {
141{{{0, 0, 0, 0},
142 {0, 0, 0, 0},
143 {0, 0, 0, 0}},
144 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
145 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
146 {1, 0, 0, 0}},
147 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
148 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
149 {1, 0, 0, 0}},
150 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
151 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
152 {1, 0, 0, 0}},
153 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
155 {1, 0, 0, 0}},
156 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
157 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
158 {1, 0, 0, 0}},
159 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
160 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
161 {1, 0, 0, 0}},
162 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
163 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
164 {1, 0, 0, 0}},
165 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
166 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
167 {1, 0, 0, 0}},
168 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
169 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
170 {1, 0, 0, 0}},
171 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
172 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
173 {1, 0, 0, 0}},
174 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
175 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
176 {1, 0, 0, 0}},
177 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
178 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
179 {1, 0, 0, 0}},
180 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
181 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
182 {1, 0, 0, 0}},
183 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
184 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
185 {1, 0, 0, 0}},
186 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
187 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
188 {1, 0, 0, 0}}},
0f113f3e
MC
189{{{0, 0, 0, 0},
190 {0, 0, 0, 0},
191 {0, 0, 0, 0}},
192 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
193 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
194 {1, 0, 0, 0}},
195 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
196 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
197 {1, 0, 0, 0}},
198 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
199 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
200 {1, 0, 0, 0}},
201 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
202 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
203 {1, 0, 0, 0}},
204 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
205 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
206 {1, 0, 0, 0}},
207 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
208 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
209 {1, 0, 0, 0}},
210 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
211 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
212 {1, 0, 0, 0}},
213 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
214 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
215 {1, 0, 0, 0}},
216 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
217 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
218 {1, 0, 0, 0}},
219 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
220 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
221 {1, 0, 0, 0}},
222 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
223 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
224 {1, 0, 0, 0}},
225 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
226 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
227 {1, 0, 0, 0}},
228 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
229 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
230 {1, 0, 0, 0}},
231 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
232 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
233 {1, 0, 0, 0}},
234 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
235 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
236 {1, 0, 0, 0}}}
237};
04daec86
BM
238
239/* Precomputation for the group generator. */
3aef36ff 240struct nistp224_pre_comp_st {
0f113f3e 241 felem g_pre_comp[2][16][3];
2f545ae4 242 CRYPTO_REF_COUNT references;
9b398ef2 243 CRYPTO_RWLOCK *lock;
3aef36ff 244};
04daec86
BM
245
246const EC_METHOD *EC_GFp_nistp224_method(void)
0f113f3e
MC
247{
248 static const EC_METHOD ret = {
249 EC_FLAGS_DEFAULT_OCT,
250 NID_X9_62_prime_field,
32ab57cb
SL
251 ossl_ec_GFp_nistp224_group_init,
252 ossl_ec_GFp_simple_group_finish,
253 ossl_ec_GFp_simple_group_clear_finish,
254 ossl_ec_GFp_nist_group_copy,
255 ossl_ec_GFp_nistp224_group_set_curve,
256 ossl_ec_GFp_simple_group_get_curve,
257 ossl_ec_GFp_simple_group_get_degree,
258 ossl_ec_group_simple_order_bits,
259 ossl_ec_GFp_simple_group_check_discriminant,
260 ossl_ec_GFp_simple_point_init,
261 ossl_ec_GFp_simple_point_finish,
262 ossl_ec_GFp_simple_point_clear_finish,
263 ossl_ec_GFp_simple_point_copy,
264 ossl_ec_GFp_simple_point_set_to_infinity,
265 ossl_ec_GFp_simple_point_set_affine_coordinates,
266 ossl_ec_GFp_nistp224_point_get_affine_coordinates,
0f113f3e
MC
267 0 /* point_set_compressed_coordinates */ ,
268 0 /* point2oct */ ,
269 0 /* oct2point */ ,
32ab57cb
SL
270 ossl_ec_GFp_simple_add,
271 ossl_ec_GFp_simple_dbl,
272 ossl_ec_GFp_simple_invert,
273 ossl_ec_GFp_simple_is_at_infinity,
274 ossl_ec_GFp_simple_is_on_curve,
275 ossl_ec_GFp_simple_cmp,
276 ossl_ec_GFp_simple_make_affine,
277 ossl_ec_GFp_simple_points_make_affine,
278 ossl_ec_GFp_nistp224_points_mul,
279 ossl_ec_GFp_nistp224_precompute_mult,
280 ossl_ec_GFp_nistp224_have_precompute_mult,
281 ossl_ec_GFp_nist_field_mul,
282 ossl_ec_GFp_nist_field_sqr,
0f113f3e 283 0 /* field_div */ ,
32ab57cb 284 ossl_ec_GFp_simple_field_inv,
0f113f3e
MC
285 0 /* field_encode */ ,
286 0 /* field_decode */ ,
9ff9bccc 287 0, /* field_set_to_one */
32ab57cb
SL
288 ossl_ec_key_simple_priv2oct,
289 ossl_ec_key_simple_oct2priv,
9ff9bccc 290 0, /* set private */
32ab57cb
SL
291 ossl_ec_key_simple_generate_key,
292 ossl_ec_key_simple_check_key,
293 ossl_ec_key_simple_generate_public_key,
9ff9bccc
DSH
294 0, /* keycopy */
295 0, /* keyfinish */
32ab57cb
SL
296 ossl_ecdh_simple_compute_key,
297 ossl_ecdsa_simple_sign_setup,
298 ossl_ecdsa_simple_sign_sig,
299 ossl_ecdsa_simple_verify_sig,
f667820c 300 0, /* field_inverse_mod_ord */
37124360
NT
301 0, /* blind_coordinates */
302 0, /* ladder_pre */
303 0, /* ladder_step */
304 0 /* ladder_post */
0f113f3e
MC
305 };
306
307 return &ret;
308}
309
310/*
311 * Helper functions to convert field elements to/from internal representation
312 */
3e00b4c9 313static void bin28_to_felem(felem out, const u8 in[28])
0f113f3e 314{
77286fe3
BE
315 out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
316 out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
317 out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
318 out[3] = (*((const limb_aX *)(in + 20))) >> 8;
0f113f3e 319}
04daec86 320
3e00b4c9 321static void felem_to_bin28(u8 out[28], const felem in)
0f113f3e
MC
322{
323 unsigned i;
324 for (i = 0; i < 7; ++i) {
325 out[i] = in[0] >> (8 * i);
326 out[i + 7] = in[1] >> (8 * i);
327 out[i + 14] = in[2] >> (8 * i);
328 out[i + 21] = in[3] >> (8 * i);
329 }
330}
04daec86 331
04daec86 332/* From OpenSSL BIGNUM to internal representation */
3e00b4c9 333static int BN_to_felem(felem out, const BIGNUM *bn)
0f113f3e 334{
0f113f3e 335 felem_bytearray b_out;
e0b660c2 336 int num_bytes;
0f113f3e 337
e0b660c2 338 if (BN_is_negative(bn)) {
9311d0c4 339 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
0f113f3e
MC
340 return 0;
341 }
e0b660c2
NT
342 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
343 if (num_bytes < 0) {
9311d0c4 344 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
0f113f3e
MC
345 return 0;
346 }
0f113f3e
MC
347 bin28_to_felem(out, b_out);
348 return 1;
349}
04daec86
BM
350
351/* From internal representation to OpenSSL BIGNUM */
3e00b4c9 352static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
0f113f3e 353{
e0b660c2
NT
354 felem_bytearray b_out;
355 felem_to_bin28(b_out, in);
356 return BN_lebin2bn(b_out, sizeof(b_out), out);
0f113f3e 357}
04daec86
BM
358
359/******************************************************************************/
3a83462d 360/*-
0f113f3e 361 * FIELD OPERATIONS
04daec86
BM
362 *
363 * Field operations, using the internal representation of field elements.
364 * NB! These operations are specific to our point multiplication and cannot be
365 * expected to be correct in general - e.g., multiplication with a large scalar
366 * will cause an overflow.
367 *
368 */
369
3e00b4c9 370static void felem_one(felem out)
0f113f3e
MC
371{
372 out[0] = 1;
373 out[1] = 0;
374 out[2] = 0;
375 out[3] = 0;
376}
3e00b4c9
BM
377
378static void felem_assign(felem out, const felem in)
0f113f3e
MC
379{
380 out[0] = in[0];
381 out[1] = in[1];
382 out[2] = in[2];
383 out[3] = in[3];
384}
3e00b4c9 385
04daec86 386/* Sum two field elements: out += in */
3e00b4c9 387static void felem_sum(felem out, const felem in)
0f113f3e
MC
388{
389 out[0] += in[0];
390 out[1] += in[1];
391 out[2] += in[2];
392 out[3] += in[3];
393}
04daec86
BM
394
395/* Subtract field elements: out -= in */
396/* Assumes in[i] < 2^57 */
3e00b4c9 397static void felem_diff(felem out, const felem in)
0f113f3e
MC
398{
399 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
400 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
401 static const limb two58m42m2 = (((limb) 1) << 58) -
402 (((limb) 1) << 42) - (((limb) 1) << 2);
403
404 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
405 out[0] += two58p2;
406 out[1] += two58m42m2;
407 out[2] += two58m2;
408 out[3] += two58m2;
409
410 out[0] -= in[0];
411 out[1] -= in[1];
412 out[2] -= in[2];
413 out[3] -= in[3];
414}
04daec86 415
3e00b4c9 416/* Subtract in unreduced 128-bit mode: out -= in */
04daec86 417/* Assumes in[i] < 2^119 */
3e00b4c9 418static void widefelem_diff(widefelem out, const widefelem in)
0f113f3e
MC
419{
420 static const widelimb two120 = ((widelimb) 1) << 120;
421 static const widelimb two120m64 = (((widelimb) 1) << 120) -
422 (((widelimb) 1) << 64);
423 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
424 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
425
426 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
427 out[0] += two120;
428 out[1] += two120m64;
429 out[2] += two120m64;
430 out[3] += two120;
431 out[4] += two120m104m64;
432 out[5] += two120m64;
433 out[6] += two120m64;
434
435 out[0] -= in[0];
436 out[1] -= in[1];
437 out[2] -= in[2];
438 out[3] -= in[3];
439 out[4] -= in[4];
440 out[5] -= in[5];
441 out[6] -= in[6];
442}
04daec86
BM
443
444/* Subtract in mixed mode: out128 -= in64 */
445/* in[i] < 2^63 */
3e00b4c9 446static void felem_diff_128_64(widefelem out, const felem in)
0f113f3e
MC
447{
448 static const widelimb two64p8 = (((widelimb) 1) << 64) +
449 (((widelimb) 1) << 8);
450 static const widelimb two64m8 = (((widelimb) 1) << 64) -
451 (((widelimb) 1) << 8);
452 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
453 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
454
455 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
456 out[0] += two64p8;
457 out[1] += two64m48m8;
458 out[2] += two64m8;
459 out[3] += two64m8;
460
461 out[0] -= in[0];
462 out[1] -= in[1];
463 out[2] -= in[2];
464 out[3] -= in[3];
465}
466
467/*
468 * Multiply a field element by a scalar: out = out * scalar The scalars we
469 * actually use are small, so results fit without overflow
470 */
3e00b4c9 471static void felem_scalar(felem out, const limb scalar)
0f113f3e
MC
472{
473 out[0] *= scalar;
474 out[1] *= scalar;
475 out[2] *= scalar;
476 out[3] *= scalar;
477}
478
479/*
480 * Multiply an unreduced field element by a scalar: out = out * scalar The
481 * scalars we actually use are small, so results fit without overflow
482 */
3e00b4c9 483static void widefelem_scalar(widefelem out, const widelimb scalar)
0f113f3e
MC
484{
485 out[0] *= scalar;
486 out[1] *= scalar;
487 out[2] *= scalar;
488 out[3] *= scalar;
489 out[4] *= scalar;
490 out[5] *= scalar;
491 out[6] *= scalar;
492}
04daec86
BM
493
494/* Square a field element: out = in^2 */
3e00b4c9 495static void felem_square(widefelem out, const felem in)
0f113f3e
MC
496{
497 limb tmp0, tmp1, tmp2;
498 tmp0 = 2 * in[0];
499 tmp1 = 2 * in[1];
500 tmp2 = 2 * in[2];
501 out[0] = ((widelimb) in[0]) * in[0];
502 out[1] = ((widelimb) in[0]) * tmp1;
503 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
504 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
505 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
506 out[5] = ((widelimb) in[3]) * tmp2;
507 out[6] = ((widelimb) in[3]) * in[3];
508}
04daec86
BM
509
510/* Multiply two field elements: out = in1 * in2 */
3e00b4c9 511static void felem_mul(widefelem out, const felem in1, const felem in2)
0f113f3e
MC
512{
513 out[0] = ((widelimb) in1[0]) * in2[0];
514 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
515 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
4eb504ae 516 ((widelimb) in1[2]) * in2[0];
0f113f3e 517 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
4eb504ae 518 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
0f113f3e 519 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
4eb504ae 520 ((widelimb) in1[3]) * in2[1];
0f113f3e
MC
521 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
522 out[6] = ((widelimb) in1[3]) * in2[3];
523}
04daec86 524
3a83462d
MC
525/*-
526 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
3e00b4c9
BM
527 * Requires in[i] < 2^126,
528 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
529static void felem_reduce(felem out, const widefelem in)
0f113f3e
MC
530{
531 static const widelimb two127p15 = (((widelimb) 1) << 127) +
532 (((widelimb) 1) << 15);
533 static const widelimb two127m71 = (((widelimb) 1) << 127) -
534 (((widelimb) 1) << 71);
535 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
536 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
537 widelimb output[5];
538
539 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
540 output[0] = in[0] + two127p15;
541 output[1] = in[1] + two127m71m55;
542 output[2] = in[2] + two127m71;
543 output[3] = in[3];
544 output[4] = in[4];
545
546 /* Eliminate in[4], in[5], in[6] */
547 output[4] += in[6] >> 16;
548 output[3] += (in[6] & 0xffff) << 40;
549 output[2] -= in[6];
550
551 output[3] += in[5] >> 16;
552 output[2] += (in[5] & 0xffff) << 40;
553 output[1] -= in[5];
554
555 output[2] += output[4] >> 16;
556 output[1] += (output[4] & 0xffff) << 40;
557 output[0] -= output[4];
558
559 /* Carry 2 -> 3 -> 4 */
560 output[3] += output[2] >> 56;
561 output[2] &= 0x00ffffffffffffff;
562
563 output[4] = output[3] >> 56;
564 output[3] &= 0x00ffffffffffffff;
565
566 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
567
568 /* Eliminate output[4] */
569 output[2] += output[4] >> 16;
570 /* output[2] < 2^56 + 2^56 = 2^57 */
571 output[1] += (output[4] & 0xffff) << 40;
572 output[0] -= output[4];
573
574 /* Carry 0 -> 1 -> 2 -> 3 */
575 output[1] += output[0] >> 56;
576 out[0] = output[0] & 0x00ffffffffffffff;
577
578 output[2] += output[1] >> 56;
579 /* output[2] < 2^57 + 2^72 */
580 out[1] = output[1] & 0x00ffffffffffffff;
581 output[3] += output[2] >> 56;
582 /* output[3] <= 2^56 + 2^16 */
583 out[2] = output[2] & 0x00ffffffffffffff;
584
50e735f9
MC
585 /*-
586 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
587 * out[3] <= 2^56 + 2^16 (due to final carry),
588 * so out < 2*p
589 */
0f113f3e
MC
590 out[3] = output[3];
591}
04daec86 592
3e00b4c9 593static void felem_square_reduce(felem out, const felem in)
0f113f3e
MC
594{
595 widefelem tmp;
596 felem_square(tmp, in);
597 felem_reduce(out, tmp);
598}
04daec86 599
3e00b4c9 600static void felem_mul_reduce(felem out, const felem in1, const felem in2)
0f113f3e
MC
601{
602 widefelem tmp;
603 felem_mul(tmp, in1, in2);
604 felem_reduce(out, tmp);
605}
606
607/*
608 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
609 * call felem_reduce first)
610 */
3e00b4c9 611static void felem_contract(felem out, const felem in)
0f113f3e
MC
612{
613 static const int64_t two56 = ((limb) 1) << 56;
614 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
615 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
616 int64_t tmp[4], a;
617 tmp[0] = in[0];
618 tmp[1] = in[1];
619 tmp[2] = in[2];
620 tmp[3] = in[3];
621 /* Case 1: a = 1 iff in >= 2^224 */
622 a = (in[3] >> 56);
623 tmp[0] -= a;
624 tmp[1] += a << 40;
625 tmp[3] &= 0x00ffffffffffffff;
626 /*
627 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
628 * and the lower part is non-zero
629 */
630 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
631 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
632 a &= 0x00ffffffffffffff;
633 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
634 a = (a - 1) >> 63;
635 /* subtract 2^224 - 2^96 + 1 if a is all-one */
636 tmp[3] &= a ^ 0xffffffffffffffff;
637 tmp[2] &= a ^ 0xffffffffffffffff;
638 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
639 tmp[0] -= 1 & a;
640
641 /*
642 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
643 * non-zero, so we only need one step
644 */
645 a = tmp[0] >> 63;
646 tmp[0] += two56 & a;
647 tmp[1] -= 1 & a;
648
649 /* carry 1 -> 2 -> 3 */
650 tmp[2] += tmp[1] >> 56;
651 tmp[1] &= 0x00ffffffffffffff;
652
653 tmp[3] += tmp[2] >> 56;
654 tmp[2] &= 0x00ffffffffffffff;
655
656 /* Now 0 <= out < p */
657 out[0] = tmp[0];
658 out[1] = tmp[1];
659 out[2] = tmp[2];
660 out[3] = tmp[3];
661}
662
dc55e4f7
DB
663/*
664 * Get negative value: out = -in
665 * Requires in[i] < 2^63,
666 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
667 */
668static void felem_neg(felem out, const felem in)
669{
80de1742
P
670 widefelem tmp;
671
672 memset(tmp, 0, sizeof(tmp));
dc55e4f7
DB
673 felem_diff_128_64(tmp, in);
674 felem_reduce(out, tmp);
675}
676
0f113f3e
MC
677/*
678 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
679 * elements are reduced to in < 2^225, so we only need to check three cases:
680 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
681 */
3e00b4c9 682static limb felem_is_zero(const felem in)
0f113f3e
MC
683{
684 limb zero, two224m96p1, two225m97p2;
685
686 zero = in[0] | in[1] | in[2] | in[3];
687 zero = (((int64_t) (zero) - 1) >> 63) & 1;
688 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
689 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
690 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
691 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
692 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
693 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
694 return (zero | two224m96p1 | two225m97p2);
695}
04daec86 696
c55b786a 697static int felem_is_zero_int(const void *in)
0f113f3e
MC
698{
699 return (int)(felem_is_zero(in) & ((limb) 1));
700}
3e00b4c9 701
04daec86
BM
702/* Invert a field element */
703/* Computation chain copied from djb's code */
3e00b4c9 704static void felem_inv(felem out, const felem in)
0f113f3e
MC
705{
706 felem ftmp, ftmp2, ftmp3, ftmp4;
707 widefelem tmp;
708 unsigned i;
709
710 felem_square(tmp, in);
711 felem_reduce(ftmp, tmp); /* 2 */
712 felem_mul(tmp, in, ftmp);
713 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
714 felem_square(tmp, ftmp);
715 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
716 felem_mul(tmp, in, ftmp);
717 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
718 felem_square(tmp, ftmp);
719 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
720 felem_square(tmp, ftmp2);
721 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
722 felem_square(tmp, ftmp2);
723 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
724 felem_mul(tmp, ftmp2, ftmp);
725 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
726 felem_square(tmp, ftmp);
727 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
728 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
729 felem_square(tmp, ftmp2);
730 felem_reduce(ftmp2, tmp);
731 }
732 felem_mul(tmp, ftmp2, ftmp);
733 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
734 felem_square(tmp, ftmp2);
735 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
736 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp3, tmp);
739 }
740 felem_mul(tmp, ftmp3, ftmp2);
741 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
742 felem_square(tmp, ftmp2);
743 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
744 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
745 felem_square(tmp, ftmp3);
746 felem_reduce(ftmp3, tmp);
747 }
748 felem_mul(tmp, ftmp3, ftmp2);
749 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
750 felem_square(tmp, ftmp3);
751 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
752 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
753 felem_square(tmp, ftmp4);
754 felem_reduce(ftmp4, tmp);
755 }
756 felem_mul(tmp, ftmp3, ftmp4);
757 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
758 felem_square(tmp, ftmp3);
759 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
760 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
761 felem_square(tmp, ftmp4);
762 felem_reduce(ftmp4, tmp);
763 }
764 felem_mul(tmp, ftmp2, ftmp4);
765 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
766 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
767 felem_square(tmp, ftmp2);
768 felem_reduce(ftmp2, tmp);
769 }
770 felem_mul(tmp, ftmp2, ftmp);
771 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
772 felem_square(tmp, ftmp);
773 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
774 felem_mul(tmp, ftmp, in);
775 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
776 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
777 felem_square(tmp, ftmp);
778 felem_reduce(ftmp, tmp);
779 }
780 felem_mul(tmp, ftmp, ftmp3);
781 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
782}
783
784/*
785 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
786 * out to itself.
787 */
788static void copy_conditional(felem out, const felem in, limb icopy)
789{
790 unsigned i;
791 /*
792 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
793 */
794 const limb copy = -icopy;
795 for (i = 0; i < 4; ++i) {
796 const limb tmp = copy & (in[i] ^ out[i]);
797 out[i] ^= tmp;
798 }
799}
04daec86 800
04daec86 801/******************************************************************************/
3a83462d 802/*-
0f113f3e 803 * ELLIPTIC CURVE POINT OPERATIONS
04daec86
BM
804 *
805 * Points are represented in Jacobian projective coordinates:
806 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
807 * or to the point at infinity if Z == 0.
808 *
809 */
810
1d97c843
TH
811/*-
812 * Double an elliptic curve point:
04daec86
BM
813 * (X', Y', Z') = 2 * (X, Y, Z), where
814 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
eb8e052c 815 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
04daec86
BM
816 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
817 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
0f113f3e 818 * while x_out == y_in is not (maybe this works, but it's not tested).
1d97c843 819 */
04daec86 820static void
3e00b4c9
BM
821point_double(felem x_out, felem y_out, felem z_out,
822 const felem x_in, const felem y_in, const felem z_in)
0f113f3e
MC
823{
824 widefelem tmp, tmp2;
825 felem delta, gamma, beta, alpha, ftmp, ftmp2;
826
827 felem_assign(ftmp, x_in);
828 felem_assign(ftmp2, x_in);
829
830 /* delta = z^2 */
831 felem_square(tmp, z_in);
832 felem_reduce(delta, tmp);
833
834 /* gamma = y^2 */
835 felem_square(tmp, y_in);
836 felem_reduce(gamma, tmp);
837
838 /* beta = x*gamma */
839 felem_mul(tmp, x_in, gamma);
840 felem_reduce(beta, tmp);
841
842 /* alpha = 3*(x-delta)*(x+delta) */
843 felem_diff(ftmp, delta);
844 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
845 felem_sum(ftmp2, delta);
846 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
847 felem_scalar(ftmp2, 3);
848 /* ftmp2[i] < 3 * 2^58 < 2^60 */
849 felem_mul(tmp, ftmp, ftmp2);
850 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
851 felem_reduce(alpha, tmp);
852
853 /* x' = alpha^2 - 8*beta */
854 felem_square(tmp, alpha);
855 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
856 felem_assign(ftmp, beta);
857 felem_scalar(ftmp, 8);
858 /* ftmp[i] < 8 * 2^57 = 2^60 */
859 felem_diff_128_64(tmp, ftmp);
860 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
861 felem_reduce(x_out, tmp);
862
863 /* z' = (y + z)^2 - gamma - delta */
864 felem_sum(delta, gamma);
865 /* delta[i] < 2^57 + 2^57 = 2^58 */
866 felem_assign(ftmp, y_in);
867 felem_sum(ftmp, z_in);
868 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
869 felem_square(tmp, ftmp);
870 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
871 felem_diff_128_64(tmp, delta);
872 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
873 felem_reduce(z_out, tmp);
874
875 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
876 felem_scalar(beta, 4);
877 /* beta[i] < 4 * 2^57 = 2^59 */
878 felem_diff(beta, x_out);
879 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
880 felem_mul(tmp, alpha, beta);
881 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
882 felem_square(tmp2, gamma);
883 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
884 widefelem_scalar(tmp2, 8);
885 /* tmp2[i] < 8 * 2^116 = 2^119 */
886 widefelem_diff(tmp, tmp2);
887 /* tmp[i] < 2^119 + 2^120 < 2^121 */
888 felem_reduce(y_out, tmp);
889}
04daec86 890
1d97c843
TH
891/*-
892 * Add two elliptic curve points:
04daec86
BM
893 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
894 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
895 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
896 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
897 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
3e00b4c9
BM
898 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
899 *
900 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
901 */
04daec86 902
0f113f3e
MC
903/*
904 * This function is not entirely constant-time: it includes a branch for
905 * checking whether the two input points are equal, (while not equal to the
906 * point at infinity). This case never happens during single point
907 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
908 */
3e00b4c9 909static void point_add(felem x3, felem y3, felem z3,
0f113f3e
MC
910 const felem x1, const felem y1, const felem z1,
911 const int mixed, const felem x2, const felem y2,
912 const felem z2)
913{
914 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
915 widefelem tmp, tmp2;
916 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
0164bf81 917 limb points_equal;
0f113f3e
MC
918
919 if (!mixed) {
920 /* ftmp2 = z2^2 */
921 felem_square(tmp, z2);
922 felem_reduce(ftmp2, tmp);
923
924 /* ftmp4 = z2^3 */
925 felem_mul(tmp, ftmp2, z2);
926 felem_reduce(ftmp4, tmp);
927
928 /* ftmp4 = z2^3*y1 */
929 felem_mul(tmp2, ftmp4, y1);
930 felem_reduce(ftmp4, tmp2);
931
932 /* ftmp2 = z2^2*x1 */
933 felem_mul(tmp2, ftmp2, x1);
934 felem_reduce(ftmp2, tmp2);
935 } else {
936 /*
937 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
938 */
939
940 /* ftmp4 = z2^3*y1 */
941 felem_assign(ftmp4, y1);
942
943 /* ftmp2 = z2^2*x1 */
944 felem_assign(ftmp2, x1);
945 }
946
947 /* ftmp = z1^2 */
948 felem_square(tmp, z1);
949 felem_reduce(ftmp, tmp);
950
951 /* ftmp3 = z1^3 */
952 felem_mul(tmp, ftmp, z1);
953 felem_reduce(ftmp3, tmp);
954
955 /* tmp = z1^3*y2 */
956 felem_mul(tmp, ftmp3, y2);
957 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
958
959 /* ftmp3 = z1^3*y2 - z2^3*y1 */
960 felem_diff_128_64(tmp, ftmp4);
961 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
962 felem_reduce(ftmp3, tmp);
963
964 /* tmp = z1^2*x2 */
965 felem_mul(tmp, ftmp, x2);
966 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
967
968 /* ftmp = z1^2*x2 - z2^2*x1 */
969 felem_diff_128_64(tmp, ftmp2);
970 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
971 felem_reduce(ftmp, tmp);
972
973 /*
0164bf81
NT
974 * The formulae are incorrect if the points are equal, in affine coordinates
975 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
976 * happens.
977 *
978 * We use bitwise operations to avoid potential side-channels introduced by
979 * the short-circuiting behaviour of boolean operators.
0f113f3e
MC
980 */
981 x_equal = felem_is_zero(ftmp);
982 y_equal = felem_is_zero(ftmp3);
0164bf81
NT
983 /*
984 * The special case of either point being the point at infinity (z1 and/or
985 * z2 are zero), is handled separately later on in this function, so we
986 * avoid jumping to point_double here in those special cases.
987 */
0f113f3e
MC
988 z1_is_zero = felem_is_zero(z1);
989 z2_is_zero = felem_is_zero(z2);
0164bf81
NT
990
991 /*
992 * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
993 * specific implementation `felem_is_zero()` returns truth as `0x1`
994 * (rather than `0xff..ff`).
995 *
996 * This implies that `~true` in this implementation becomes
997 * `0xff..fe` (rather than `0x0`): for this reason, to be used in
998 * the if expression, we mask out only the last bit in the next
999 * line.
1000 */
1001 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1002
1003 if (points_equal) {
1004 /*
1005 * This is obviously not constant-time but, as mentioned before, this
1006 * case never happens during single point multiplication, so there is no
1007 * timing leak for ECDH or ECDSA signing.
1008 */
0f113f3e
MC
1009 point_double(x3, y3, z3, x1, y1, z1);
1010 return;
1011 }
1012
1013 /* ftmp5 = z1*z2 */
1014 if (!mixed) {
1015 felem_mul(tmp, z1, z2);
1016 felem_reduce(ftmp5, tmp);
1017 } else {
1018 /* special case z2 = 0 is handled later */
1019 felem_assign(ftmp5, z1);
1020 }
1021
1022 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1023 felem_mul(tmp, ftmp, ftmp5);
1024 felem_reduce(z_out, tmp);
1025
1026 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1027 felem_assign(ftmp5, ftmp);
1028 felem_square(tmp, ftmp);
1029 felem_reduce(ftmp, tmp);
1030
1031 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1032 felem_mul(tmp, ftmp, ftmp5);
1033 felem_reduce(ftmp5, tmp);
1034
1035 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1036 felem_mul(tmp, ftmp2, ftmp);
1037 felem_reduce(ftmp2, tmp);
1038
1039 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1040 felem_mul(tmp, ftmp4, ftmp5);
1041 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1042
1043 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1044 felem_square(tmp2, ftmp3);
1045 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1046
1047 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1048 felem_diff_128_64(tmp2, ftmp5);
1049 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1050
1051 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1052 felem_assign(ftmp5, ftmp2);
1053 felem_scalar(ftmp5, 2);
1054 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1055
50e735f9
MC
1056 /*-
1057 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1058 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1059 */
0f113f3e
MC
1060 felem_diff_128_64(tmp2, ftmp5);
1061 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1062 felem_reduce(x_out, tmp2);
1063
1064 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1065 felem_diff(ftmp2, x_out);
1066 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1067
1068 /*
1069 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1070 */
1071 felem_mul(tmp2, ftmp3, ftmp2);
1072 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1073
50e735f9
MC
1074 /*-
1075 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1076 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1077 */
0f113f3e
MC
1078 widefelem_diff(tmp2, tmp);
1079 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1080 felem_reduce(y_out, tmp2);
1081
1082 /*
1083 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1084 * the point at infinity, so we need to check for this separately
1085 */
1086
1087 /*
1088 * if point 1 is at infinity, copy point 2 to output, and vice versa
1089 */
1090 copy_conditional(x_out, x2, z1_is_zero);
1091 copy_conditional(x_out, x1, z2_is_zero);
1092 copy_conditional(y_out, y2, z1_is_zero);
1093 copy_conditional(y_out, y1, z2_is_zero);
1094 copy_conditional(z_out, z2, z1_is_zero);
1095 copy_conditional(z_out, z1, z2_is_zero);
1096 felem_assign(x3, x_out);
1097 felem_assign(y3, y_out);
1098 felem_assign(z3, z_out);
1099}
04daec86 1100
dbd87ffc
MC
1101/*
1102 * select_point selects the |idx|th point from a precomputation table and
1103 * copies it to out.
1104 * The pre_comp array argument should be size of |size| argument
1105 */
0f113f3e
MC
1106static void select_point(const u64 idx, unsigned int size,
1107 const felem pre_comp[][3], felem out[3])
1108{
1109 unsigned i, j;
1110 limb *outlimbs = &out[0][0];
0f113f3e 1111
88f4c6f3 1112 memset(out, 0, sizeof(*out) * 3);
0f113f3e
MC
1113 for (i = 0; i < size; i++) {
1114 const limb *inlimbs = &pre_comp[i][0][0];
1115 u64 mask = i ^ idx;
1116 mask |= mask >> 4;
1117 mask |= mask >> 2;
1118 mask |= mask >> 1;
1119 mask &= 1;
1120 mask--;
1121 for (j = 0; j < 4 * 3; j++)
1122 outlimbs[j] |= inlimbs[j] & mask;
1123 }
1124}
3e00b4c9
BM
1125
1126/* get_bit returns the |i|th bit in |in| */
1127static char get_bit(const felem_bytearray in, unsigned i)
0f113f3e
MC
1128{
1129 if (i >= 224)
1130 return 0;
1131 return (in[i >> 3] >> (i & 7)) & 1;
1132}
1133
1134/*
1135 * Interleaved point multiplication using precomputed point multiples: The
1136 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1137 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1138 * generator, using certain (large) precomputed multiples in g_pre_comp.
1139 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1140 */
3e00b4c9 1141static void batch_mul(felem x_out, felem y_out, felem z_out,
0f113f3e
MC
1142 const felem_bytearray scalars[],
1143 const unsigned num_points, const u8 *g_scalar,
1144 const int mixed, const felem pre_comp[][17][3],
1145 const felem g_pre_comp[2][16][3])
1146{
1147 int i, skip;
1148 unsigned num;
1149 unsigned gen_mul = (g_scalar != NULL);
1150 felem nq[3], tmp[4];
1151 u64 bits;
1152 u8 sign, digit;
1153
1154 /* set nq to the point at infinity */
16f8d4eb 1155 memset(nq, 0, sizeof(nq));
0f113f3e
MC
1156
1157 /*
1158 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1159 * of the generator (two in each of the last 28 rounds) and additions of
1160 * other points multiples (every 5th round).
1161 */
1162 skip = 1; /* save two point operations in the first
1163 * round */
1164 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1165 /* double */
1166 if (!skip)
1167 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1168
1169 /* add multiples of the generator */
1170 if (gen_mul && (i <= 27)) {
1171 /* first, look 28 bits upwards */
1172 bits = get_bit(g_scalar, i + 196) << 3;
1173 bits |= get_bit(g_scalar, i + 140) << 2;
1174 bits |= get_bit(g_scalar, i + 84) << 1;
1175 bits |= get_bit(g_scalar, i + 28);
1176 /* select the point to add, in constant time */
1177 select_point(bits, 16, g_pre_comp[1], tmp);
1178
1179 if (!skip) {
1180 /* value 1 below is argument for "mixed" */
1181 point_add(nq[0], nq[1], nq[2],
1182 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1183 } else {
1184 memcpy(nq, tmp, 3 * sizeof(felem));
1185 skip = 0;
1186 }
1187
1188 /* second, look at the current position */
1189 bits = get_bit(g_scalar, i + 168) << 3;
1190 bits |= get_bit(g_scalar, i + 112) << 2;
1191 bits |= get_bit(g_scalar, i + 56) << 1;
1192 bits |= get_bit(g_scalar, i);
1193 /* select the point to add, in constant time */
1194 select_point(bits, 16, g_pre_comp[0], tmp);
1195 point_add(nq[0], nq[1], nq[2],
1196 nq[0], nq[1], nq[2],
1197 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1198 }
1199
1200 /* do other additions every 5 doublings */
1201 if (num_points && (i % 5 == 0)) {
1202 /* loop over all scalars */
1203 for (num = 0; num < num_points; ++num) {
1204 bits = get_bit(scalars[num], i + 4) << 5;
1205 bits |= get_bit(scalars[num], i + 3) << 4;
1206 bits |= get_bit(scalars[num], i + 2) << 3;
1207 bits |= get_bit(scalars[num], i + 1) << 2;
1208 bits |= get_bit(scalars[num], i) << 1;
1209 bits |= get_bit(scalars[num], i - 1);
32ab57cb 1210 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
0f113f3e
MC
1211
1212 /* select the point to add or subtract */
1213 select_point(digit, 17, pre_comp[num], tmp);
1214 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1215 * point */
1216 copy_conditional(tmp[1], tmp[3], sign);
1217
1218 if (!skip) {
1219 point_add(nq[0], nq[1], nq[2],
1220 nq[0], nq[1], nq[2],
1221 mixed, tmp[0], tmp[1], tmp[2]);
1222 } else {
1223 memcpy(nq, tmp, 3 * sizeof(felem));
1224 skip = 0;
1225 }
1226 }
1227 }
1228 }
1229 felem_assign(x_out, nq[0]);
1230 felem_assign(y_out, nq[1]);
1231 felem_assign(z_out, nq[2]);
1232}
04daec86
BM
1233
1234/******************************************************************************/
0f113f3e
MC
1235/*
1236 * FUNCTIONS TO MANAGE PRECOMPUTATION
04daec86
BM
1237 */
1238
3f5abab9 1239static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
0f113f3e 1240{
b51bce94
RS
1241 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1242
0f113f3e 1243 if (!ret) {
9311d0c4 1244 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
0f113f3e
MC
1245 return ret;
1246 }
9b398ef2 1247
0f113f3e 1248 ret->references = 1;
9b398ef2
AG
1249
1250 ret->lock = CRYPTO_THREAD_lock_new();
1251 if (ret->lock == NULL) {
9311d0c4 1252 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
9b398ef2
AG
1253 OPENSSL_free(ret);
1254 return NULL;
1255 }
0f113f3e
MC
1256 return ret;
1257}
04daec86 1258
3aef36ff 1259NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
0f113f3e 1260{
9b398ef2 1261 int i;
3aef36ff 1262 if (p != NULL)
2f545ae4 1263 CRYPTO_UP_REF(&p->references, &i, p->lock);
3aef36ff 1264 return p;
0f113f3e 1265}
04daec86 1266
3aef36ff 1267void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
0f113f3e 1268{
9b398ef2
AG
1269 int i;
1270
1271 if (p == NULL)
1272 return;
1273
2f545ae4 1274 CRYPTO_DOWN_REF(&p->references, &i, p->lock);
9b398ef2
AG
1275 REF_PRINT_COUNT("EC_nistp224", x);
1276 if (i > 0)
0f113f3e 1277 return;
9b398ef2
AG
1278 REF_ASSERT_ISNT(i < 0);
1279
1280 CRYPTO_THREAD_lock_free(p->lock);
3aef36ff 1281 OPENSSL_free(p);
0f113f3e 1282}
04daec86
BM
1283
1284/******************************************************************************/
0f113f3e
MC
1285/*
1286 * OPENSSL EC_METHOD FUNCTIONS
04daec86
BM
1287 */
1288
32ab57cb 1289int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
0f113f3e
MC
1290{
1291 int ret;
32ab57cb 1292 ret = ossl_ec_GFp_simple_group_init(group);
0f113f3e
MC
1293 group->a_is_minus3 = 1;
1294 return ret;
1295}
04daec86 1296
32ab57cb
SL
1297int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1298 const BIGNUM *a, const BIGNUM *b,
1299 BN_CTX *ctx)
0f113f3e
MC
1300{
1301 int ret = 0;
0f113f3e 1302 BIGNUM *curve_p, *curve_a, *curve_b;
f844f9eb 1303#ifndef FIPS_MODULE
a9612d6c 1304 BN_CTX *new_ctx = NULL;
0f113f3e
MC
1305
1306 if (ctx == NULL)
a6482df0 1307 ctx = new_ctx = BN_CTX_new();
a9612d6c
MC
1308#endif
1309 if (ctx == NULL)
1310 return 0;
1311
0f113f3e 1312 BN_CTX_start(ctx);
edea42c6
PY
1313 curve_p = BN_CTX_get(ctx);
1314 curve_a = BN_CTX_get(ctx);
1315 curve_b = BN_CTX_get(ctx);
1316 if (curve_b == NULL)
0f113f3e
MC
1317 goto err;
1318 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1319 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1320 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1321 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
9311d0c4 1322 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
0f113f3e
MC
1323 goto err;
1324 }
1325 group->field_mod_func = BN_nist_mod_224;
32ab57cb 1326 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
0f113f3e
MC
1327 err:
1328 BN_CTX_end(ctx);
f844f9eb 1329#ifndef FIPS_MODULE
23a1d5e9 1330 BN_CTX_free(new_ctx);
a9612d6c 1331#endif
0f113f3e
MC
1332 return ret;
1333}
1334
1335/*
1336 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1337 * (X/Z^2, Y/Z^3)
1338 */
32ab57cb
SL
1339int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1340 const EC_POINT *point,
1341 BIGNUM *x, BIGNUM *y,
1342 BN_CTX *ctx)
0f113f3e
MC
1343{
1344 felem z1, z2, x_in, y_in, x_out, y_out;
1345 widefelem tmp;
1346
1347 if (EC_POINT_is_at_infinity(group, point)) {
9311d0c4 1348 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
0f113f3e
MC
1349 return 0;
1350 }
ace8f546
AP
1351 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1352 (!BN_to_felem(z1, point->Z)))
0f113f3e
MC
1353 return 0;
1354 felem_inv(z2, z1);
1355 felem_square(tmp, z2);
1356 felem_reduce(z1, tmp);
1357 felem_mul(tmp, x_in, z1);
1358 felem_reduce(x_in, tmp);
1359 felem_contract(x_out, x_in);
1360 if (x != NULL) {
1361 if (!felem_to_BN(x, x_out)) {
9311d0c4 1362 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1363 return 0;
1364 }
1365 }
1366 felem_mul(tmp, z1, z2);
1367 felem_reduce(z1, tmp);
1368 felem_mul(tmp, y_in, z1);
1369 felem_reduce(y_in, tmp);
1370 felem_contract(y_out, y_in);
1371 if (y != NULL) {
1372 if (!felem_to_BN(y, y_out)) {
9311d0c4 1373 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1374 return 0;
1375 }
1376 }
1377 return 1;
1378}
1379
1380static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1381 felem tmp_felems[ /* num+1 */ ])
1382{
1383 /*
1384 * Runs in constant time, unless an input is the point at infinity (which
1385 * normally shouldn't happen).
1386 */
32ab57cb
SL
1387 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1388 points,
1389 sizeof(felem),
1390 tmp_felems,
1391 (void (*)(void *))felem_one,
1392 felem_is_zero_int,
1393 (void (*)(void *, const void *))
1394 felem_assign,
1395 (void (*)(void *, const void *))
1396 felem_square_reduce, (void (*)
1397 (void *,
1398 const void
1399 *,
1400 const void
1401 *))
1402 felem_mul_reduce,
1403 (void (*)(void *, const void *))
1404 felem_inv,
1405 (void (*)(void *, const void *))
1406 felem_contract);
0f113f3e
MC
1407}
1408
1409/*
1410 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1411 * values Result is stored in r (r can equal one of the inputs).
1412 */
32ab57cb
SL
1413int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1414 const BIGNUM *scalar, size_t num,
1415 const EC_POINT *points[],
1416 const BIGNUM *scalars[], BN_CTX *ctx)
0f113f3e
MC
1417{
1418 int ret = 0;
1419 int j;
1420 unsigned i;
1421 int mixed = 0;
0f113f3e
MC
1422 BIGNUM *x, *y, *z, *tmp_scalar;
1423 felem_bytearray g_secret;
1424 felem_bytearray *secrets = NULL;
16f8d4eb 1425 felem (*pre_comp)[17][3] = NULL;
0f113f3e 1426 felem *tmp_felems = NULL;
e0b660c2 1427 int num_bytes;
0f113f3e
MC
1428 int have_pre_comp = 0;
1429 size_t num_points = num;
1430 felem x_in, y_in, z_in, x_out, y_out, z_out;
1431 NISTP224_PRE_COMP *pre = NULL;
1432 const felem(*g_pre_comp)[16][3] = NULL;
1433 EC_POINT *generator = NULL;
1434 const EC_POINT *p = NULL;
1435 const BIGNUM *p_scalar = NULL;
1436
0f113f3e 1437 BN_CTX_start(ctx);
edea42c6
PY
1438 x = BN_CTX_get(ctx);
1439 y = BN_CTX_get(ctx);
1440 z = BN_CTX_get(ctx);
1441 tmp_scalar = BN_CTX_get(ctx);
1442 if (tmp_scalar == NULL)
0f113f3e
MC
1443 goto err;
1444
1445 if (scalar != NULL) {
3aef36ff 1446 pre = group->pre_comp.nistp224;
0f113f3e
MC
1447 if (pre)
1448 /* we have precomputation, try to use it */
1449 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1450 else
1451 /* try to use the standard precomputation */
1452 g_pre_comp = &gmul[0];
1453 generator = EC_POINT_new(group);
1454 if (generator == NULL)
1455 goto err;
1456 /* get the generator from precomputation */
1457 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1458 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1459 !felem_to_BN(z, g_pre_comp[0][1][2])) {
9311d0c4 1460 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1461 goto err;
1462 }
32ab57cb
SL
1463 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1464 generator,
1465 x, y, z, ctx))
0f113f3e
MC
1466 goto err;
1467 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1468 /* precomputation matches generator */
1469 have_pre_comp = 1;
1470 else
1471 /*
1472 * we don't have valid precomputation: treat the generator as a
1473 * random point
1474 */
1475 num_points = num_points + 1;
1476 }
1477
1478 if (num_points > 0) {
1479 if (num_points >= 3) {
1480 /*
1481 * unless we precompute multiples for just one or two points,
1482 * converting those into affine form is time well spent
1483 */
1484 mixed = 1;
1485 }
b51bce94
RS
1486 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1487 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
0f113f3e
MC
1488 if (mixed)
1489 tmp_felems =
16f8d4eb 1490 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
0f113f3e
MC
1491 if ((secrets == NULL) || (pre_comp == NULL)
1492 || (mixed && (tmp_felems == NULL))) {
9311d0c4 1493 ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
0f113f3e
MC
1494 goto err;
1495 }
1496
1497 /*
1498 * we treat NULL scalars as 0, and NULL points as points at infinity,
1499 * i.e., they contribute nothing to the linear combination
1500 */
0f113f3e 1501 for (i = 0; i < num_points; ++i) {
4fe2ee3a 1502 if (i == num) {
0f113f3e 1503 /* the generator */
0f113f3e
MC
1504 p = EC_GROUP_get0_generator(group);
1505 p_scalar = scalar;
4fe2ee3a 1506 } else {
0f113f3e 1507 /* the i^th point */
0f113f3e
MC
1508 p = points[i];
1509 p_scalar = scalars[i];
1510 }
1511 if ((p_scalar != NULL) && (p != NULL)) {
1512 /* reduce scalar to 0 <= scalar < 2^224 */
1513 if ((BN_num_bits(p_scalar) > 224)
1514 || (BN_is_negative(p_scalar))) {
1515 /*
1516 * this is an unusual input, and we don't guarantee
1517 * constant-timeness
1518 */
ace8f546 1519 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
9311d0c4 1520 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1521 goto err;
1522 }
e0b660c2
NT
1523 num_bytes = BN_bn2lebinpad(tmp_scalar,
1524 secrets[i], sizeof(secrets[i]));
1525 } else {
1526 num_bytes = BN_bn2lebinpad(p_scalar,
1527 secrets[i], sizeof(secrets[i]));
1528 }
1529 if (num_bytes < 0) {
9311d0c4 1530 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
e0b660c2
NT
1531 goto err;
1532 }
0f113f3e 1533 /* precompute multiples */
ace8f546
AP
1534 if ((!BN_to_felem(x_out, p->X)) ||
1535 (!BN_to_felem(y_out, p->Y)) ||
1536 (!BN_to_felem(z_out, p->Z)))
0f113f3e
MC
1537 goto err;
1538 felem_assign(pre_comp[i][1][0], x_out);
1539 felem_assign(pre_comp[i][1][1], y_out);
1540 felem_assign(pre_comp[i][1][2], z_out);
1541 for (j = 2; j <= 16; ++j) {
1542 if (j & 1) {
1543 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1544 pre_comp[i][j][2], pre_comp[i][1][0],
1545 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1546 pre_comp[i][j - 1][0],
1547 pre_comp[i][j - 1][1],
1548 pre_comp[i][j - 1][2]);
1549 } else {
1550 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1551 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1552 pre_comp[i][j / 2][1],
1553 pre_comp[i][j / 2][2]);
1554 }
1555 }
1556 }
1557 }
1558 if (mixed)
1559 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1560 }
1561
1562 /* the scalar for the generator */
1563 if ((scalar != NULL) && (have_pre_comp)) {
16f8d4eb 1564 memset(g_secret, 0, sizeof(g_secret));
0f113f3e
MC
1565 /* reduce scalar to 0 <= scalar < 2^224 */
1566 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1567 /*
1568 * this is an unusual input, and we don't guarantee
1569 * constant-timeness
1570 */
ace8f546 1571 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
9311d0c4 1572 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1573 goto err;
1574 }
e0b660c2 1575 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
4fe2ee3a 1576 } else {
e0b660c2 1577 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
4fe2ee3a 1578 }
0f113f3e
MC
1579 /* do the multiplication with generator precomputation */
1580 batch_mul(x_out, y_out, z_out,
1581 (const felem_bytearray(*))secrets, num_points,
1582 g_secret,
1583 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
4fe2ee3a 1584 } else {
0f113f3e
MC
1585 /* do the multiplication without generator precomputation */
1586 batch_mul(x_out, y_out, z_out,
1587 (const felem_bytearray(*))secrets, num_points,
1588 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
4fe2ee3a 1589 }
0f113f3e
MC
1590 /* reduce the output to its unique minimal representation */
1591 felem_contract(x_in, x_out);
1592 felem_contract(y_in, y_out);
1593 felem_contract(z_in, z_out);
1594 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1595 (!felem_to_BN(z, z_in))) {
9311d0c4 1596 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
0f113f3e
MC
1597 goto err;
1598 }
32ab57cb
SL
1599 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1600 ctx);
0f113f3e
MC
1601
1602 err:
1603 BN_CTX_end(ctx);
8fdc3734 1604 EC_POINT_free(generator);
b548a1f1
RS
1605 OPENSSL_free(secrets);
1606 OPENSSL_free(pre_comp);
1607 OPENSSL_free(tmp_felems);
0f113f3e
MC
1608 return ret;
1609}
04daec86 1610
32ab57cb 1611int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
0f113f3e
MC
1612{
1613 int ret = 0;
1614 NISTP224_PRE_COMP *pre = NULL;
1615 int i, j;
0f113f3e
MC
1616 BIGNUM *x, *y;
1617 EC_POINT *generator = NULL;
1618 felem tmp_felems[32];
f844f9eb 1619#ifndef FIPS_MODULE
a9612d6c
MC
1620 BN_CTX *new_ctx = NULL;
1621#endif
0f113f3e
MC
1622
1623 /* throw away old precomputation */
2c52ac9b 1624 EC_pre_comp_free(group);
a9612d6c 1625
f844f9eb 1626#ifndef FIPS_MODULE
0f113f3e 1627 if (ctx == NULL)
a6482df0 1628 ctx = new_ctx = BN_CTX_new();
a9612d6c
MC
1629#endif
1630 if (ctx == NULL)
1631 return 0;
1632
0f113f3e 1633 BN_CTX_start(ctx);
edea42c6
PY
1634 x = BN_CTX_get(ctx);
1635 y = BN_CTX_get(ctx);
1636 if (y == NULL)
0f113f3e
MC
1637 goto err;
1638 /* get the generator */
1639 if (group->generator == NULL)
1640 goto err;
1641 generator = EC_POINT_new(group);
1642 if (generator == NULL)
1643 goto err;
1644 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1645 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
9cc570d4 1646 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
0f113f3e
MC
1647 goto err;
1648 if ((pre = nistp224_pre_comp_new()) == NULL)
1649 goto err;
1650 /*
1651 * if the generator is the standard one, use built-in precomputation
1652 */
1653 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1654 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
615614c8 1655 goto done;
0f113f3e 1656 }
ace8f546
AP
1657 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1658 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1659 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
0f113f3e
MC
1660 goto err;
1661 /*
1662 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1663 * 2^140*G, 2^196*G for the second one
1664 */
1665 for (i = 1; i <= 8; i <<= 1) {
1666 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1667 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1668 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1669 for (j = 0; j < 27; ++j) {
1670 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1671 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1672 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1673 }
1674 if (i == 8)
1675 break;
1676 point_double(pre->g_pre_comp[0][2 * i][0],
1677 pre->g_pre_comp[0][2 * i][1],
1678 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1679 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1680 for (j = 0; j < 27; ++j) {
1681 point_double(pre->g_pre_comp[0][2 * i][0],
1682 pre->g_pre_comp[0][2 * i][1],
1683 pre->g_pre_comp[0][2 * i][2],
1684 pre->g_pre_comp[0][2 * i][0],
1685 pre->g_pre_comp[0][2 * i][1],
1686 pre->g_pre_comp[0][2 * i][2]);
1687 }
1688 }
1689 for (i = 0; i < 2; i++) {
1690 /* g_pre_comp[i][0] is the point at infinity */
1691 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1692 /* the remaining multiples */
1693 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1694 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1695 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1696 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1697 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1698 pre->g_pre_comp[i][2][2]);
1699 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1700 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1701 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1702 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1703 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704 pre->g_pre_comp[i][2][2]);
1705 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1706 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1707 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1708 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1710 pre->g_pre_comp[i][4][2]);
1711 /*
1712 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1713 */
1714 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1715 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1716 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1717 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1718 pre->g_pre_comp[i][2][2]);
1719 for (j = 1; j < 8; ++j) {
1720 /* odd multiples: add G resp. 2^28*G */
1721 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1722 pre->g_pre_comp[i][2 * j + 1][1],
1723 pre->g_pre_comp[i][2 * j + 1][2],
1724 pre->g_pre_comp[i][2 * j][0],
1725 pre->g_pre_comp[i][2 * j][1],
1726 pre->g_pre_comp[i][2 * j][2], 0,
1727 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1728 pre->g_pre_comp[i][1][2]);
1729 }
1730 }
1731 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1732
615614c8 1733 done:
3aef36ff 1734 SETPRECOMP(group, nistp224, pre);
0f113f3e 1735 pre = NULL;
3aef36ff 1736 ret = 1;
04daec86 1737 err:
0f113f3e 1738 BN_CTX_end(ctx);
8fdc3734 1739 EC_POINT_free(generator);
f844f9eb 1740#ifndef FIPS_MODULE
23a1d5e9 1741 BN_CTX_free(new_ctx);
a9612d6c 1742#endif
3aef36ff 1743 EC_nistp224_pre_comp_free(pre);
0f113f3e
MC
1744 return ret;
1745}
04daec86 1746
32ab57cb 1747int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
0f113f3e 1748{
3aef36ff 1749 return HAVEPRECOMP(group, nistp224);
0f113f3e 1750}