crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * ECDSA low level APIs are deprecated for public use, but still ok for
  28  * internal use.
  29  */
  30 #include "internal/deprecated.h"
  31
  32 /*
  33  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  34  *
  35  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  36  * and Adam Langley's public domain 64-bit C implementation of curve25519
  37  */
  38
  39 #include <openssl/opensslconf.h>
  40
  41 #include <stdint.h>
  42 #include <string.h>
  43 #include <openssl/err.h>
  44 #include "ec_local.h"
  45
  46 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
  47   /* even with gcc, the typedef won't work for 32-bit platforms */
  48 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  49                                  * platforms */
  50 #else
  51 # error "Your compiler doesn't appear to support 128-bit integer types"
  52 #endif
  53
  54 typedef uint8_t u8;
  55 typedef uint64_t u64;
  56
  57 /******************************************************************************/
  58 /*-
  59  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  60  *
  61  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  62  * using 64-bit coefficients called 'limbs',
  63  * and sometimes (for multiplication results) as
  64  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  65  * using 128-bit coefficients called 'widelimbs'.
  66  * A 4-limb representation is an 'felem';
  67  * a 7-widelimb representation is a 'widefelem'.
  68  * Even within felems, bits of adjacent limbs overlap, and we don't always
  69  * reduce the representations: we ensure that inputs to each felem
  70  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  71  * and fit into a 128-bit word without overflow. The coefficients are then
  72  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  73  * We only reduce to the unique minimal representation at the end of the
  74  * computation.
  75  */
  76
  77 typedef uint64_t limb;
  78 typedef uint64_t limb_aX __attribute((__aligned__(1)));
  79 typedef uint128_t widelimb;
  80
  81 typedef limb felem[4];
  82 typedef widelimb widefelem[7];
  83
  84 /*
  85  * Field element represented as a byte array. 28*8 = 224 bits is also the
  86  * group order size for the elliptic curve, and we also use this type for
  87  * scalars for point multiplication.
  88  */
  89 typedef u8 felem_bytearray[28];
  90
  91 static const felem_bytearray nistp224_curve_params[5] = {
  92     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  93      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  94      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  95     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  96      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  97      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  98     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  99      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
 100      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
 101     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
 102      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
 103      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 104     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 105      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 106      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 107 };
 108
 109 /*-
 110  * Precomputed multiples of the standard generator
 111  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 112  * (0 for the point at infinity).
 113  * For each field element, slice a_0 is word 0, etc.
 114  *
 115  * The table has 2 * 16 elements, starting with the following:
 116  * index | bits    | point
 117  * ------+---------+------------------------------
 118  *     0 | 0 0 0 0 | 0G
 119  *     1 | 0 0 0 1 | 1G
 120  *     2 | 0 0 1 0 | 2^56G
 121  *     3 | 0 0 1 1 | (2^56 + 1)G
 122  *     4 | 0 1 0 0 | 2^112G
 123  *     5 | 0 1 0 1 | (2^112 + 1)G
 124  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 125  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 126  *     8 | 1 0 0 0 | 2^168G
 127  *     9 | 1 0 0 1 | (2^168 + 1)G
 128  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 129  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 130  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 131  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 132  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 133  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 134  * followed by a copy of this with each element multiplied by 2^28.
 135  *
 136  * The reason for this is so that we can clock bits into four different
 137  * locations when doing simple scalar multiplies against the base point,
 138  * and then another four locations using the second 16 elements.
 139  */
 140 static const felem gmul[2][16][3] = {
 141 {{{0, 0, 0, 0},
 142   {0, 0, 0, 0},
 143   {0, 0, 0, 0}},
 144  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 145   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 146   {1, 0, 0, 0}},
 147  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 148   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 149   {1, 0, 0, 0}},
 150  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 151   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 152   {1, 0, 0, 0}},
 153  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 154   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 155   {1, 0, 0, 0}},
 156  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 157   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 158   {1, 0, 0, 0}},
 159  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 160   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 161   {1, 0, 0, 0}},
 162  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 163   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 164   {1, 0, 0, 0}},
 165  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 166   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 167   {1, 0, 0, 0}},
 168  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 169   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 170   {1, 0, 0, 0}},
 171  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 172   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 173   {1, 0, 0, 0}},
 174  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 175   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 176   {1, 0, 0, 0}},
 177  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 178   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 179   {1, 0, 0, 0}},
 180  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 181   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 182   {1, 0, 0, 0}},
 183  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 184   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 185   {1, 0, 0, 0}},
 186  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 187   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 188   {1, 0, 0, 0}}},
 189 {{{0, 0, 0, 0},
 190   {0, 0, 0, 0},
 191   {0, 0, 0, 0}},
 192  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 193   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 194   {1, 0, 0, 0}},
 195  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 196   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 197   {1, 0, 0, 0}},
 198  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 199   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 200   {1, 0, 0, 0}},
 201  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 202   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 203   {1, 0, 0, 0}},
 204  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 205   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 206   {1, 0, 0, 0}},
 207  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 208   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 209   {1, 0, 0, 0}},
 210  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 211   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 212   {1, 0, 0, 0}},
 213  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 214   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 215   {1, 0, 0, 0}},
 216  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 217   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 218   {1, 0, 0, 0}},
 219  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 220   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 221   {1, 0, 0, 0}},
 222  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 223   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 224   {1, 0, 0, 0}},
 225  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 226   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 227   {1, 0, 0, 0}},
 228  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 229   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 230   {1, 0, 0, 0}},
 231  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 232   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 233   {1, 0, 0, 0}},
 234  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 235   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 236   {1, 0, 0, 0}}}
 237 };
 238
 239 /* Precomputation for the group generator. */
 240 struct nistp224_pre_comp_st {
 241     felem g_pre_comp[2][16][3];
 242     CRYPTO_REF_COUNT references;
 243     CRYPTO_RWLOCK *lock;
 244 };
 245
 246 const EC_METHOD *EC_GFp_nistp224_method(void)
 247 {
 248     static const EC_METHOD ret = {
 249         EC_FLAGS_DEFAULT_OCT,
 250         NID_X9_62_prime_field,
 251         ossl_ec_GFp_nistp224_group_init,
 252         ossl_ec_GFp_simple_group_finish,
 253         ossl_ec_GFp_simple_group_clear_finish,
 254         ossl_ec_GFp_nist_group_copy,
 255         ossl_ec_GFp_nistp224_group_set_curve,
 256         ossl_ec_GFp_simple_group_get_curve,
 257         ossl_ec_GFp_simple_group_get_degree,
 258         ossl_ec_group_simple_order_bits,
 259         ossl_ec_GFp_simple_group_check_discriminant,
 260         ossl_ec_GFp_simple_point_init,
 261         ossl_ec_GFp_simple_point_finish,
 262         ossl_ec_GFp_simple_point_clear_finish,
 263         ossl_ec_GFp_simple_point_copy,
 264         ossl_ec_GFp_simple_point_set_to_infinity,
 265         ossl_ec_GFp_simple_point_set_affine_coordinates,
 266         ossl_ec_GFp_nistp224_point_get_affine_coordinates,
 267         0 /* point_set_compressed_coordinates */ ,
 268         0 /* point2oct */ ,
 269         0 /* oct2point */ ,
 270         ossl_ec_GFp_simple_add,
 271         ossl_ec_GFp_simple_dbl,
 272         ossl_ec_GFp_simple_invert,
 273         ossl_ec_GFp_simple_is_at_infinity,
 274         ossl_ec_GFp_simple_is_on_curve,
 275         ossl_ec_GFp_simple_cmp,
 276         ossl_ec_GFp_simple_make_affine,
 277         ossl_ec_GFp_simple_points_make_affine,
 278         ossl_ec_GFp_nistp224_points_mul,
 279         ossl_ec_GFp_nistp224_precompute_mult,
 280         ossl_ec_GFp_nistp224_have_precompute_mult,
 281         ossl_ec_GFp_nist_field_mul,
 282         ossl_ec_GFp_nist_field_sqr,
 283         0 /* field_div */ ,
 284         ossl_ec_GFp_simple_field_inv,
 285         0 /* field_encode */ ,
 286         0 /* field_decode */ ,
 287         0,                      /* field_set_to_one */
 288         ossl_ec_key_simple_priv2oct,
 289         ossl_ec_key_simple_oct2priv,
 290         0, /* set private */
 291         ossl_ec_key_simple_generate_key,
 292         ossl_ec_key_simple_check_key,
 293         ossl_ec_key_simple_generate_public_key,
 294         0, /* keycopy */
 295         0, /* keyfinish */
 296         ossl_ecdh_simple_compute_key,
 297         ossl_ecdsa_simple_sign_setup,
 298         ossl_ecdsa_simple_sign_sig,
 299         ossl_ecdsa_simple_verify_sig,
 300         0, /* field_inverse_mod_ord */
 301         0, /* blind_coordinates */
 302         0, /* ladder_pre */
 303         0, /* ladder_step */
 304         0  /* ladder_post */
 305     };
 306
 307     return &ret;
 308 }
 309
 310 /*
 311  * Helper functions to convert field elements to/from internal representation
 312  */
 313 static void bin28_to_felem(felem out, const u8 in[28])
 314 {
 315     out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
 316     out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
 317     out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
 318     out[3] = (*((const limb_aX *)(in + 20))) >> 8;
 319 }
 320
 321 static void felem_to_bin28(u8 out[28], const felem in)
 322 {
 323     unsigned i;
 324     for (i = 0; i < 7; ++i) {
 325         out[i] = in[0] >> (8 * i);
 326         out[i + 7] = in[1] >> (8 * i);
 327         out[i + 14] = in[2] >> (8 * i);
 328         out[i + 21] = in[3] >> (8 * i);
 329     }
 330 }
 331
 332 /* From OpenSSL BIGNUM to internal representation */
 333 static int BN_to_felem(felem out, const BIGNUM *bn)
 334 {
 335     felem_bytearray b_out;
 336     int num_bytes;
 337
 338     if (BN_is_negative(bn)) {
 339         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
 340         return 0;
 341     }
 342     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 343     if (num_bytes < 0) {
 344         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
 345         return 0;
 346     }
 347     bin28_to_felem(out, b_out);
 348     return 1;
 349 }
 350
 351 /* From internal representation to OpenSSL BIGNUM */
 352 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 353 {
 354     felem_bytearray b_out;
 355     felem_to_bin28(b_out, in);
 356     return BN_lebin2bn(b_out, sizeof(b_out), out);
 357 }
 358
 359 /******************************************************************************/
 360 /*-
 361  *                              FIELD OPERATIONS
 362  *
 363  * Field operations, using the internal representation of field elements.
 364  * NB! These operations are specific to our point multiplication and cannot be
 365  * expected to be correct in general - e.g., multiplication with a large scalar
 366  * will cause an overflow.
 367  *
 368  */
 369
 370 static void felem_one(felem out)
 371 {
 372     out[0] = 1;
 373     out[1] = 0;
 374     out[2] = 0;
 375     out[3] = 0;
 376 }
 377
 378 static void felem_assign(felem out, const felem in)
 379 {
 380     out[0] = in[0];
 381     out[1] = in[1];
 382     out[2] = in[2];
 383     out[3] = in[3];
 384 }
 385
 386 /* Sum two field elements: out += in */
 387 static void felem_sum(felem out, const felem in)
 388 {
 389     out[0] += in[0];
 390     out[1] += in[1];
 391     out[2] += in[2];
 392     out[3] += in[3];
 393 }
 394
 395 /* Subtract field elements: out -= in */
 396 /* Assumes in[i] < 2^57 */
 397 static void felem_diff(felem out, const felem in)
 398 {
 399     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 400     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 401     static const limb two58m42m2 = (((limb) 1) << 58) -
 402         (((limb) 1) << 42) - (((limb) 1) << 2);
 403
 404     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 405     out[0] += two58p2;
 406     out[1] += two58m42m2;
 407     out[2] += two58m2;
 408     out[3] += two58m2;
 409
 410     out[0] -= in[0];
 411     out[1] -= in[1];
 412     out[2] -= in[2];
 413     out[3] -= in[3];
 414 }
 415
 416 /* Subtract in unreduced 128-bit mode: out -= in */
 417 /* Assumes in[i] < 2^119 */
 418 static void widefelem_diff(widefelem out, const widefelem in)
 419 {
 420     static const widelimb two120 = ((widelimb) 1) << 120;
 421     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 422         (((widelimb) 1) << 64);
 423     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 424         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 425
 426     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 427     out[0] += two120;
 428     out[1] += two120m64;
 429     out[2] += two120m64;
 430     out[3] += two120;
 431     out[4] += two120m104m64;
 432     out[5] += two120m64;
 433     out[6] += two120m64;
 434
 435     out[0] -= in[0];
 436     out[1] -= in[1];
 437     out[2] -= in[2];
 438     out[3] -= in[3];
 439     out[4] -= in[4];
 440     out[5] -= in[5];
 441     out[6] -= in[6];
 442 }
 443
 444 /* Subtract in mixed mode: out128 -= in64 */
 445 /* in[i] < 2^63 */
 446 static void felem_diff_128_64(widefelem out, const felem in)
 447 {
 448     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 449         (((widelimb) 1) << 8);
 450     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 451         (((widelimb) 1) << 8);
 452     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 453         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 454
 455     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 456     out[0] += two64p8;
 457     out[1] += two64m48m8;
 458     out[2] += two64m8;
 459     out[3] += two64m8;
 460
 461     out[0] -= in[0];
 462     out[1] -= in[1];
 463     out[2] -= in[2];
 464     out[3] -= in[3];
 465 }
 466
 467 /*
 468  * Multiply a field element by a scalar: out = out * scalar The scalars we
 469  * actually use are small, so results fit without overflow
 470  */
 471 static void felem_scalar(felem out, const limb scalar)
 472 {
 473     out[0] *= scalar;
 474     out[1] *= scalar;
 475     out[2] *= scalar;
 476     out[3] *= scalar;
 477 }
 478
 479 /*
 480  * Multiply an unreduced field element by a scalar: out = out * scalar The
 481  * scalars we actually use are small, so results fit without overflow
 482  */
 483 static void widefelem_scalar(widefelem out, const widelimb scalar)
 484 {
 485     out[0] *= scalar;
 486     out[1] *= scalar;
 487     out[2] *= scalar;
 488     out[3] *= scalar;
 489     out[4] *= scalar;
 490     out[5] *= scalar;
 491     out[6] *= scalar;
 492 }
 493
 494 /* Square a field element: out = in^2 */
 495 static void felem_square(widefelem out, const felem in)
 496 {
 497     limb tmp0, tmp1, tmp2;
 498     tmp0 = 2 * in[0];
 499     tmp1 = 2 * in[1];
 500     tmp2 = 2 * in[2];
 501     out[0] = ((widelimb) in[0]) * in[0];
 502     out[1] = ((widelimb) in[0]) * tmp1;
 503     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 504     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 505     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 506     out[5] = ((widelimb) in[3]) * tmp2;
 507     out[6] = ((widelimb) in[3]) * in[3];
 508 }
 509
 510 /* Multiply two field elements: out = in1 * in2 */
 511 static void felem_mul(widefelem out, const felem in1, const felem in2)
 512 {
 513     out[0] = ((widelimb) in1[0]) * in2[0];
 514     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 515     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 516              ((widelimb) in1[2]) * in2[0];
 517     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 518              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 519     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 520              ((widelimb) in1[3]) * in2[1];
 521     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 522     out[6] = ((widelimb) in1[3]) * in2[3];
 523 }
 524
 525 /*-
 526  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 527  * Requires in[i] < 2^126,
 528  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 529 static void felem_reduce(felem out, const widefelem in)
 530 {
 531     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 532         (((widelimb) 1) << 15);
 533     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 534         (((widelimb) 1) << 71);
 535     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 536         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 537     widelimb output[5];
 538
 539     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 540     output[0] = in[0] + two127p15;
 541     output[1] = in[1] + two127m71m55;
 542     output[2] = in[2] + two127m71;
 543     output[3] = in[3];
 544     output[4] = in[4];
 545
 546     /* Eliminate in[4], in[5], in[6] */
 547     output[4] += in[6] >> 16;
 548     output[3] += (in[6] & 0xffff) << 40;
 549     output[2] -= in[6];
 550
 551     output[3] += in[5] >> 16;
 552     output[2] += (in[5] & 0xffff) << 40;
 553     output[1] -= in[5];
 554
 555     output[2] += output[4] >> 16;
 556     output[1] += (output[4] & 0xffff) << 40;
 557     output[0] -= output[4];
 558
 559     /* Carry 2 -> 3 -> 4 */
 560     output[3] += output[2] >> 56;
 561     output[2] &= 0x00ffffffffffffff;
 562
 563     output[4] = output[3] >> 56;
 564     output[3] &= 0x00ffffffffffffff;
 565
 566     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 567
 568     /* Eliminate output[4] */
 569     output[2] += output[4] >> 16;
 570     /* output[2] < 2^56 + 2^56 = 2^57 */
 571     output[1] += (output[4] & 0xffff) << 40;
 572     output[0] -= output[4];
 573
 574     /* Carry 0 -> 1 -> 2 -> 3 */
 575     output[1] += output[0] >> 56;
 576     out[0] = output[0] & 0x00ffffffffffffff;
 577
 578     output[2] += output[1] >> 56;
 579     /* output[2] < 2^57 + 2^72 */
 580     out[1] = output[1] & 0x00ffffffffffffff;
 581     output[3] += output[2] >> 56;
 582     /* output[3] <= 2^56 + 2^16 */
 583     out[2] = output[2] & 0x00ffffffffffffff;
 584
 585     /*-
 586      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 587      * out[3] <= 2^56 + 2^16 (due to final carry),
 588      * so out < 2*p
 589      */
 590     out[3] = output[3];
 591 }
 592
 593 static void felem_square_reduce(felem out, const felem in)
 594 {
 595     widefelem tmp;
 596     felem_square(tmp, in);
 597     felem_reduce(out, tmp);
 598 }
 599
 600 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 601 {
 602     widefelem tmp;
 603     felem_mul(tmp, in1, in2);
 604     felem_reduce(out, tmp);
 605 }
 606
 607 /*
 608  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 609  * call felem_reduce first)
 610  */
 611 static void felem_contract(felem out, const felem in)
 612 {
 613     static const int64_t two56 = ((limb) 1) << 56;
 614     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 615     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 616     int64_t tmp[4], a;
 617     tmp[0] = in[0];
 618     tmp[1] = in[1];
 619     tmp[2] = in[2];
 620     tmp[3] = in[3];
 621     /* Case 1: a = 1 iff in >= 2^224 */
 622     a = (in[3] >> 56);
 623     tmp[0] -= a;
 624     tmp[1] += a << 40;
 625     tmp[3] &= 0x00ffffffffffffff;
 626     /*
 627      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 628      * and the lower part is non-zero
 629      */
 630     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 631         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 632     a &= 0x00ffffffffffffff;
 633     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 634     a = (a - 1) >> 63;
 635     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 636     tmp[3] &= a ^ 0xffffffffffffffff;
 637     tmp[2] &= a ^ 0xffffffffffffffff;
 638     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 639     tmp[0] -= 1 & a;
 640
 641     /*
 642      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 643      * non-zero, so we only need one step
 644      */
 645     a = tmp[0] >> 63;
 646     tmp[0] += two56 & a;
 647     tmp[1] -= 1 & a;
 648
 649     /* carry 1 -> 2 -> 3 */
 650     tmp[2] += tmp[1] >> 56;
 651     tmp[1] &= 0x00ffffffffffffff;
 652
 653     tmp[3] += tmp[2] >> 56;
 654     tmp[2] &= 0x00ffffffffffffff;
 655
 656     /* Now 0 <= out < p */
 657     out[0] = tmp[0];
 658     out[1] = tmp[1];
 659     out[2] = tmp[2];
 660     out[3] = tmp[3];
 661 }
 662
 663 /*
 664  * Get negative value: out = -in
 665  * Requires in[i] < 2^63,
 666  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 667  */
 668 static void felem_neg(felem out, const felem in)
 669 {
 670     widefelem tmp;
 671
 672     memset(tmp, 0, sizeof(tmp));
 673     felem_diff_128_64(tmp, in);
 674     felem_reduce(out, tmp);
 675 }
 676
 677 /*
 678  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 679  * elements are reduced to in < 2^225, so we only need to check three cases:
 680  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 681  */
 682 static limb felem_is_zero(const felem in)
 683 {
 684     limb zero, two224m96p1, two225m97p2;
 685
 686     zero = in[0] | in[1] | in[2] | in[3];
 687     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 688     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 689         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 690     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 691     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 692         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 693     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 694     return (zero | two224m96p1 | two225m97p2);
 695 }
 696
 697 static int felem_is_zero_int(const void *in)
 698 {
 699     return (int)(felem_is_zero(in) & ((limb) 1));
 700 }
 701
 702 /* Invert a field element */
 703 /* Computation chain copied from djb's code */
 704 static void felem_inv(felem out, const felem in)
 705 {
 706     felem ftmp, ftmp2, ftmp3, ftmp4;
 707     widefelem tmp;
 708     unsigned i;
 709
 710     felem_square(tmp, in);
 711     felem_reduce(ftmp, tmp);    /* 2 */
 712     felem_mul(tmp, in, ftmp);
 713     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 714     felem_square(tmp, ftmp);
 715     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 716     felem_mul(tmp, in, ftmp);
 717     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 718     felem_square(tmp, ftmp);
 719     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 720     felem_square(tmp, ftmp2);
 721     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 722     felem_square(tmp, ftmp2);
 723     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 724     felem_mul(tmp, ftmp2, ftmp);
 725     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 726     felem_square(tmp, ftmp);
 727     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 728     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 729         felem_square(tmp, ftmp2);
 730         felem_reduce(ftmp2, tmp);
 731     }
 732     felem_mul(tmp, ftmp2, ftmp);
 733     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 734     felem_square(tmp, ftmp2);
 735     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 736     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 737         felem_square(tmp, ftmp3);
 738         felem_reduce(ftmp3, tmp);
 739     }
 740     felem_mul(tmp, ftmp3, ftmp2);
 741     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 742     felem_square(tmp, ftmp2);
 743     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 744     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 745         felem_square(tmp, ftmp3);
 746         felem_reduce(ftmp3, tmp);
 747     }
 748     felem_mul(tmp, ftmp3, ftmp2);
 749     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 750     felem_square(tmp, ftmp3);
 751     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 752     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 753         felem_square(tmp, ftmp4);
 754         felem_reduce(ftmp4, tmp);
 755     }
 756     felem_mul(tmp, ftmp3, ftmp4);
 757     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 758     felem_square(tmp, ftmp3);
 759     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 760     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 761         felem_square(tmp, ftmp4);
 762         felem_reduce(ftmp4, tmp);
 763     }
 764     felem_mul(tmp, ftmp2, ftmp4);
 765     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 766     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 767         felem_square(tmp, ftmp2);
 768         felem_reduce(ftmp2, tmp);
 769     }
 770     felem_mul(tmp, ftmp2, ftmp);
 771     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 772     felem_square(tmp, ftmp);
 773     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 774     felem_mul(tmp, ftmp, in);
 775     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 776     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 777         felem_square(tmp, ftmp);
 778         felem_reduce(ftmp, tmp);
 779     }
 780     felem_mul(tmp, ftmp, ftmp3);
 781     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 782 }
 783
 784 /*
 785  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 786  * out to itself.
 787  */
 788 static void copy_conditional(felem out, const felem in, limb icopy)
 789 {
 790     unsigned i;
 791     /*
 792      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 793      */
 794     const limb copy = -icopy;
 795     for (i = 0; i < 4; ++i) {
 796         const limb tmp = copy & (in[i] ^ out[i]);
 797         out[i] ^= tmp;
 798     }
 799 }
 800
 801 /******************************************************************************/
 802 /*-
 803  *                       ELLIPTIC CURVE POINT OPERATIONS
 804  *
 805  * Points are represented in Jacobian projective coordinates:
 806  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 807  * or to the point at infinity if Z == 0.
 808  *
 809  */
 810
 811 /*-
 812  * Double an elliptic curve point:
 813  * (X', Y', Z') = 2 * (X, Y, Z), where
 814  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 815  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 816  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 817  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 818  * while x_out == y_in is not (maybe this works, but it's not tested).
 819  */
 820 static void
 821 point_double(felem x_out, felem y_out, felem z_out,
 822              const felem x_in, const felem y_in, const felem z_in)
 823 {
 824     widefelem tmp, tmp2;
 825     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 826
 827     felem_assign(ftmp, x_in);
 828     felem_assign(ftmp2, x_in);
 829
 830     /* delta = z^2 */
 831     felem_square(tmp, z_in);
 832     felem_reduce(delta, tmp);
 833
 834     /* gamma = y^2 */
 835     felem_square(tmp, y_in);
 836     felem_reduce(gamma, tmp);
 837
 838     /* beta = x*gamma */
 839     felem_mul(tmp, x_in, gamma);
 840     felem_reduce(beta, tmp);
 841
 842     /* alpha = 3*(x-delta)*(x+delta) */
 843     felem_diff(ftmp, delta);
 844     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 845     felem_sum(ftmp2, delta);
 846     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 847     felem_scalar(ftmp2, 3);
 848     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 849     felem_mul(tmp, ftmp, ftmp2);
 850     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 851     felem_reduce(alpha, tmp);
 852
 853     /* x' = alpha^2 - 8*beta */
 854     felem_square(tmp, alpha);
 855     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 856     felem_assign(ftmp, beta);
 857     felem_scalar(ftmp, 8);
 858     /* ftmp[i] < 8 * 2^57 = 2^60 */
 859     felem_diff_128_64(tmp, ftmp);
 860     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 861     felem_reduce(x_out, tmp);
 862
 863     /* z' = (y + z)^2 - gamma - delta */
 864     felem_sum(delta, gamma);
 865     /* delta[i] < 2^57 + 2^57 = 2^58 */
 866     felem_assign(ftmp, y_in);
 867     felem_sum(ftmp, z_in);
 868     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 869     felem_square(tmp, ftmp);
 870     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 871     felem_diff_128_64(tmp, delta);
 872     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 873     felem_reduce(z_out, tmp);
 874
 875     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 876     felem_scalar(beta, 4);
 877     /* beta[i] < 4 * 2^57 = 2^59 */
 878     felem_diff(beta, x_out);
 879     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 880     felem_mul(tmp, alpha, beta);
 881     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 882     felem_square(tmp2, gamma);
 883     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 884     widefelem_scalar(tmp2, 8);
 885     /* tmp2[i] < 8 * 2^116 = 2^119 */
 886     widefelem_diff(tmp, tmp2);
 887     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 888     felem_reduce(y_out, tmp);
 889 }
 890
 891 /*-
 892  * Add two elliptic curve points:
 893  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 894  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 895  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 896  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 897  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 898  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 899  *
 900  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 901  */
 902
 903 /*
 904  * This function is not entirely constant-time: it includes a branch for
 905  * checking whether the two input points are equal, (while not equal to the
 906  * point at infinity). This case never happens during single point
 907  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 908  */
 909 static void point_add(felem x3, felem y3, felem z3,
 910                       const felem x1, const felem y1, const felem z1,
 911                       const int mixed, const felem x2, const felem y2,
 912                       const felem z2)
 913 {
 914     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 915     widefelem tmp, tmp2;
 916     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 917     limb points_equal;
 918
 919     if (!mixed) {
 920         /* ftmp2 = z2^2 */
 921         felem_square(tmp, z2);
 922         felem_reduce(ftmp2, tmp);
 923
 924         /* ftmp4 = z2^3 */
 925         felem_mul(tmp, ftmp2, z2);
 926         felem_reduce(ftmp4, tmp);
 927
 928         /* ftmp4 = z2^3*y1 */
 929         felem_mul(tmp2, ftmp4, y1);
 930         felem_reduce(ftmp4, tmp2);
 931
 932         /* ftmp2 = z2^2*x1 */
 933         felem_mul(tmp2, ftmp2, x1);
 934         felem_reduce(ftmp2, tmp2);
 935     } else {
 936         /*
 937          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 938          */
 939
 940         /* ftmp4 = z2^3*y1 */
 941         felem_assign(ftmp4, y1);
 942
 943         /* ftmp2 = z2^2*x1 */
 944         felem_assign(ftmp2, x1);
 945     }
 946
 947     /* ftmp = z1^2 */
 948     felem_square(tmp, z1);
 949     felem_reduce(ftmp, tmp);
 950
 951     /* ftmp3 = z1^3 */
 952     felem_mul(tmp, ftmp, z1);
 953     felem_reduce(ftmp3, tmp);
 954
 955     /* tmp = z1^3*y2 */
 956     felem_mul(tmp, ftmp3, y2);
 957     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 958
 959     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 960     felem_diff_128_64(tmp, ftmp4);
 961     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 962     felem_reduce(ftmp3, tmp);
 963
 964     /* tmp = z1^2*x2 */
 965     felem_mul(tmp, ftmp, x2);
 966     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 967
 968     /* ftmp = z1^2*x2 - z2^2*x1 */
 969     felem_diff_128_64(tmp, ftmp2);
 970     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 971     felem_reduce(ftmp, tmp);
 972
 973     /*
 974      * The formulae are incorrect if the points are equal, in affine coordinates
 975      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 976      * happens.
 977      *
 978      * We use bitwise operations to avoid potential side-channels introduced by
 979      * the short-circuiting behaviour of boolean operators.
 980      */
 981     x_equal = felem_is_zero(ftmp);
 982     y_equal = felem_is_zero(ftmp3);
 983     /*
 984      * The special case of either point being the point at infinity (z1 and/or
 985      * z2 are zero), is handled separately later on in this function, so we
 986      * avoid jumping to point_double here in those special cases.
 987      */
 988     z1_is_zero = felem_is_zero(z1);
 989     z2_is_zero = felem_is_zero(z2);
 990
 991     /*
 992      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 993      * specific implementation `felem_is_zero()` returns truth as `0x1`
 994      * (rather than `0xff..ff`).
 995      *
 996      * This implies that `~true` in this implementation becomes
 997      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 998      * the if expression, we mask out only the last bit in the next
 999      * line.
1000      */
1001     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1002
1003     if (points_equal) {
1004         /*
1005          * This is obviously not constant-time but, as mentioned before, this
1006          * case never happens during single point multiplication, so there is no
1007          * timing leak for ECDH or ECDSA signing.
1008          */
1009         point_double(x3, y3, z3, x1, y1, z1);
1010         return;
1011     }
1012
1013     /* ftmp5 = z1*z2 */
1014     if (!mixed) {
1015         felem_mul(tmp, z1, z2);
1016         felem_reduce(ftmp5, tmp);
1017     } else {
1018         /* special case z2 = 0 is handled later */
1019         felem_assign(ftmp5, z1);
1020     }
1021
1022     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1023     felem_mul(tmp, ftmp, ftmp5);
1024     felem_reduce(z_out, tmp);
1025
1026     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1027     felem_assign(ftmp5, ftmp);
1028     felem_square(tmp, ftmp);
1029     felem_reduce(ftmp, tmp);
1030
1031     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1032     felem_mul(tmp, ftmp, ftmp5);
1033     felem_reduce(ftmp5, tmp);
1034
1035     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1036     felem_mul(tmp, ftmp2, ftmp);
1037     felem_reduce(ftmp2, tmp);
1038
1039     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1040     felem_mul(tmp, ftmp4, ftmp5);
1041     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1042
1043     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1044     felem_square(tmp2, ftmp3);
1045     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1046
1047     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1048     felem_diff_128_64(tmp2, ftmp5);
1049     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1050
1051     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1052     felem_assign(ftmp5, ftmp2);
1053     felem_scalar(ftmp5, 2);
1054     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1055
1056     /*-
1057      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1058      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1059      */
1060     felem_diff_128_64(tmp2, ftmp5);
1061     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1062     felem_reduce(x_out, tmp2);
1063
1064     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1065     felem_diff(ftmp2, x_out);
1066     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1067
1068     /*
1069      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1070      */
1071     felem_mul(tmp2, ftmp3, ftmp2);
1072     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1073
1074     /*-
1075      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1076      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1077      */
1078     widefelem_diff(tmp2, tmp);
1079     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1080     felem_reduce(y_out, tmp2);
1081
1082     /*
1083      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1084      * the point at infinity, so we need to check for this separately
1085      */
1086
1087     /*
1088      * if point 1 is at infinity, copy point 2 to output, and vice versa
1089      */
1090     copy_conditional(x_out, x2, z1_is_zero);
1091     copy_conditional(x_out, x1, z2_is_zero);
1092     copy_conditional(y_out, y2, z1_is_zero);
1093     copy_conditional(y_out, y1, z2_is_zero);
1094     copy_conditional(z_out, z2, z1_is_zero);
1095     copy_conditional(z_out, z1, z2_is_zero);
1096     felem_assign(x3, x_out);
1097     felem_assign(y3, y_out);
1098     felem_assign(z3, z_out);
1099 }
1100
1101 /*
1102  * select_point selects the |idx|th point from a precomputation table and
1103  * copies it to out.
1104  * The pre_comp array argument should be size of |size| argument
1105  */
1106 static void select_point(const u64 idx, unsigned int size,
1107                          const felem pre_comp[][3], felem out[3])
1108 {
1109     unsigned i, j;
1110     limb *outlimbs = &out[0][0];
1111
1112     memset(out, 0, sizeof(*out) * 3);
1113     for (i = 0; i < size; i++) {
1114         const limb *inlimbs = &pre_comp[i][0][0];
1115         u64 mask = i ^ idx;
1116         mask |= mask >> 4;
1117         mask |= mask >> 2;
1118         mask |= mask >> 1;
1119         mask &= 1;
1120         mask--;
1121         for (j = 0; j < 4 * 3; j++)
1122             outlimbs[j] |= inlimbs[j] & mask;
1123     }
1124 }
1125
1126 /* get_bit returns the |i|th bit in |in| */
1127 static char get_bit(const felem_bytearray in, unsigned i)
1128 {
1129     if (i >= 224)
1130         return 0;
1131     return (in[i >> 3] >> (i & 7)) & 1;
1132 }
1133
1134 /*
1135  * Interleaved point multiplication using precomputed point multiples: The
1136  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1137  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1138  * generator, using certain (large) precomputed multiples in g_pre_comp.
1139  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1140  */
1141 static void batch_mul(felem x_out, felem y_out, felem z_out,
1142                       const felem_bytearray scalars[],
1143                       const unsigned num_points, const u8 *g_scalar,
1144                       const int mixed, const felem pre_comp[][17][3],
1145                       const felem g_pre_comp[2][16][3])
1146 {
1147     int i, skip;
1148     unsigned num;
1149     unsigned gen_mul = (g_scalar != NULL);
1150     felem nq[3], tmp[4];
1151     u64 bits;
1152     u8 sign, digit;
1153
1154     /* set nq to the point at infinity */
1155     memset(nq, 0, sizeof(nq));
1156
1157     /*
1158      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1159      * of the generator (two in each of the last 28 rounds) and additions of
1160      * other points multiples (every 5th round).
1161      */
1162     skip = 1;                   /* save two point operations in the first
1163                                  * round */
1164     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1165         /* double */
1166         if (!skip)
1167             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1168
1169         /* add multiples of the generator */
1170         if (gen_mul && (i <= 27)) {
1171             /* first, look 28 bits upwards */
1172             bits = get_bit(g_scalar, i + 196) << 3;
1173             bits |= get_bit(g_scalar, i + 140) << 2;
1174             bits |= get_bit(g_scalar, i + 84) << 1;
1175             bits |= get_bit(g_scalar, i + 28);
1176             /* select the point to add, in constant time */
1177             select_point(bits, 16, g_pre_comp[1], tmp);
1178
1179             if (!skip) {
1180                 /* value 1 below is argument for "mixed" */
1181                 point_add(nq[0], nq[1], nq[2],
1182                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1183             } else {
1184                 memcpy(nq, tmp, 3 * sizeof(felem));
1185                 skip = 0;
1186             }
1187
1188             /* second, look at the current position */
1189             bits = get_bit(g_scalar, i + 168) << 3;
1190             bits |= get_bit(g_scalar, i + 112) << 2;
1191             bits |= get_bit(g_scalar, i + 56) << 1;
1192             bits |= get_bit(g_scalar, i);
1193             /* select the point to add, in constant time */
1194             select_point(bits, 16, g_pre_comp[0], tmp);
1195             point_add(nq[0], nq[1], nq[2],
1196                       nq[0], nq[1], nq[2],
1197                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1198         }
1199
1200         /* do other additions every 5 doublings */
1201         if (num_points && (i % 5 == 0)) {
1202             /* loop over all scalars */
1203             for (num = 0; num < num_points; ++num) {
1204                 bits = get_bit(scalars[num], i + 4) << 5;
1205                 bits |= get_bit(scalars[num], i + 3) << 4;
1206                 bits |= get_bit(scalars[num], i + 2) << 3;
1207                 bits |= get_bit(scalars[num], i + 1) << 2;
1208                 bits |= get_bit(scalars[num], i) << 1;
1209                 bits |= get_bit(scalars[num], i - 1);
1210                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1211
1212                 /* select the point to add or subtract */
1213                 select_point(digit, 17, pre_comp[num], tmp);
1214                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1215                                             * point */
1216                 copy_conditional(tmp[1], tmp[3], sign);
1217
1218                 if (!skip) {
1219                     point_add(nq[0], nq[1], nq[2],
1220                               nq[0], nq[1], nq[2],
1221                               mixed, tmp[0], tmp[1], tmp[2]);
1222                 } else {
1223                     memcpy(nq, tmp, 3 * sizeof(felem));
1224                     skip = 0;
1225                 }
1226             }
1227         }
1228     }
1229     felem_assign(x_out, nq[0]);
1230     felem_assign(y_out, nq[1]);
1231     felem_assign(z_out, nq[2]);
1232 }
1233
1234 /******************************************************************************/
1235 /*
1236  * FUNCTIONS TO MANAGE PRECOMPUTATION
1237  */
1238
1239 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1240 {
1241     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1242
1243     if (!ret) {
1244         ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1245         return ret;
1246     }
1247
1248     ret->references = 1;
1249
1250     ret->lock = CRYPTO_THREAD_lock_new();
1251     if (ret->lock == NULL) {
1252         ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1253         OPENSSL_free(ret);
1254         return NULL;
1255     }
1256     return ret;
1257 }
1258
1259 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1260 {
1261     int i;
1262     if (p != NULL)
1263         CRYPTO_UP_REF(&p->references, &i, p->lock);
1264     return p;
1265 }
1266
1267 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1268 {
1269     int i;
1270
1271     if (p == NULL)
1272         return;
1273
1274     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1275     REF_PRINT_COUNT("EC_nistp224", x);
1276     if (i > 0)
1277         return;
1278     REF_ASSERT_ISNT(i < 0);
1279
1280     CRYPTO_THREAD_lock_free(p->lock);
1281     OPENSSL_free(p);
1282 }
1283
1284 /******************************************************************************/
1285 /*
1286  * OPENSSL EC_METHOD FUNCTIONS
1287  */
1288
1289 int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1290 {
1291     int ret;
1292     ret = ossl_ec_GFp_simple_group_init(group);
1293     group->a_is_minus3 = 1;
1294     return ret;
1295 }
1296
1297 int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1298                                          const BIGNUM *a, const BIGNUM *b,
1299                                          BN_CTX *ctx)
1300 {
1301     int ret = 0;
1302     BIGNUM *curve_p, *curve_a, *curve_b;
1303 #ifndef FIPS_MODULE
1304     BN_CTX *new_ctx = NULL;
1305
1306     if (ctx == NULL)
1307         ctx = new_ctx = BN_CTX_new();
1308 #endif
1309     if (ctx == NULL)
1310         return 0;
1311
1312     BN_CTX_start(ctx);
1313     curve_p = BN_CTX_get(ctx);
1314     curve_a = BN_CTX_get(ctx);
1315     curve_b = BN_CTX_get(ctx);
1316     if (curve_b == NULL)
1317         goto err;
1318     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1319     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1320     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1321     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1322         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1323         goto err;
1324     }
1325     group->field_mod_func = BN_nist_mod_224;
1326     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327  err:
1328     BN_CTX_end(ctx);
1329 #ifndef FIPS_MODULE
1330     BN_CTX_free(new_ctx);
1331 #endif
1332     return ret;
1333 }
1334
1335 /*
1336  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1337  * (X/Z^2, Y/Z^3)
1338  */
1339 int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1340                                                       const EC_POINT *point,
1341                                                       BIGNUM *x, BIGNUM *y,
1342                                                       BN_CTX *ctx)
1343 {
1344     felem z1, z2, x_in, y_in, x_out, y_out;
1345     widefelem tmp;
1346
1347     if (EC_POINT_is_at_infinity(group, point)) {
1348         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1349         return 0;
1350     }
1351     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1352         (!BN_to_felem(z1, point->Z)))
1353         return 0;
1354     felem_inv(z2, z1);
1355     felem_square(tmp, z2);
1356     felem_reduce(z1, tmp);
1357     felem_mul(tmp, x_in, z1);
1358     felem_reduce(x_in, tmp);
1359     felem_contract(x_out, x_in);
1360     if (x != NULL) {
1361         if (!felem_to_BN(x, x_out)) {
1362             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1363             return 0;
1364         }
1365     }
1366     felem_mul(tmp, z1, z2);
1367     felem_reduce(z1, tmp);
1368     felem_mul(tmp, y_in, z1);
1369     felem_reduce(y_in, tmp);
1370     felem_contract(y_out, y_in);
1371     if (y != NULL) {
1372         if (!felem_to_BN(y, y_out)) {
1373             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1374             return 0;
1375         }
1376     }
1377     return 1;
1378 }
1379
1380 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1381                                felem tmp_felems[ /* num+1 */ ])
1382 {
1383     /*
1384      * Runs in constant time, unless an input is the point at infinity (which
1385      * normally shouldn't happen).
1386      */
1387     ossl_ec_GFp_nistp_points_make_affine_internal(num,
1388                                                   points,
1389                                                   sizeof(felem),
1390                                                   tmp_felems,
1391                                                   (void (*)(void *))felem_one,
1392                                                   felem_is_zero_int,
1393                                                   (void (*)(void *, const void *))
1394                                                   felem_assign,
1395                                                   (void (*)(void *, const void *))
1396                                                   felem_square_reduce, (void (*)
1397                                                                         (void *,
1398                                                                          const void
1399                                                                          *,
1400                                                                          const void
1401                                                                          *))
1402                                                   felem_mul_reduce,
1403                                                   (void (*)(void *, const void *))
1404                                                   felem_inv,
1405                                                   (void (*)(void *, const void *))
1406                                                   felem_contract);
1407 }
1408
1409 /*
1410  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1411  * values Result is stored in r (r can equal one of the inputs).
1412  */
1413 int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1414                                     const BIGNUM *scalar, size_t num,
1415                                     const EC_POINT *points[],
1416                                     const BIGNUM *scalars[], BN_CTX *ctx)
1417 {
1418     int ret = 0;
1419     int j;
1420     unsigned i;
1421     int mixed = 0;
1422     BIGNUM *x, *y, *z, *tmp_scalar;
1423     felem_bytearray g_secret;
1424     felem_bytearray *secrets = NULL;
1425     felem (*pre_comp)[17][3] = NULL;
1426     felem *tmp_felems = NULL;
1427     int num_bytes;
1428     int have_pre_comp = 0;
1429     size_t num_points = num;
1430     felem x_in, y_in, z_in, x_out, y_out, z_out;
1431     NISTP224_PRE_COMP *pre = NULL;
1432     const felem(*g_pre_comp)[16][3] = NULL;
1433     EC_POINT *generator = NULL;
1434     const EC_POINT *p = NULL;
1435     const BIGNUM *p_scalar = NULL;
1436
1437     BN_CTX_start(ctx);
1438     x = BN_CTX_get(ctx);
1439     y = BN_CTX_get(ctx);
1440     z = BN_CTX_get(ctx);
1441     tmp_scalar = BN_CTX_get(ctx);
1442     if (tmp_scalar == NULL)
1443         goto err;
1444
1445     if (scalar != NULL) {
1446         pre = group->pre_comp.nistp224;
1447         if (pre)
1448             /* we have precomputation, try to use it */
1449             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1450         else
1451             /* try to use the standard precomputation */
1452             g_pre_comp = &gmul[0];
1453         generator = EC_POINT_new(group);
1454         if (generator == NULL)
1455             goto err;
1456         /* get the generator from precomputation */
1457         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1458             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1459             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1460             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1461             goto err;
1462         }
1463         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1464                                                                 generator,
1465                                                                 x, y, z, ctx))
1466             goto err;
1467         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1468             /* precomputation matches generator */
1469             have_pre_comp = 1;
1470         else
1471             /*
1472              * we don't have valid precomputation: treat the generator as a
1473              * random point
1474              */
1475             num_points = num_points + 1;
1476     }
1477
1478     if (num_points > 0) {
1479         if (num_points >= 3) {
1480             /*
1481              * unless we precompute multiples for just one or two points,
1482              * converting those into affine form is time well spent
1483              */
1484             mixed = 1;
1485         }
1486         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1487         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1488         if (mixed)
1489             tmp_felems =
1490                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1491         if ((secrets == NULL) || (pre_comp == NULL)
1492             || (mixed && (tmp_felems == NULL))) {
1493             ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1494             goto err;
1495         }
1496
1497         /*
1498          * we treat NULL scalars as 0, and NULL points as points at infinity,
1499          * i.e., they contribute nothing to the linear combination
1500          */
1501         for (i = 0; i < num_points; ++i) {
1502             if (i == num) {
1503                 /* the generator */
1504                 p = EC_GROUP_get0_generator(group);
1505                 p_scalar = scalar;
1506             } else {
1507                 /* the i^th point */
1508                 p = points[i];
1509                 p_scalar = scalars[i];
1510             }
1511             if ((p_scalar != NULL) && (p != NULL)) {
1512                 /* reduce scalar to 0 <= scalar < 2^224 */
1513                 if ((BN_num_bits(p_scalar) > 224)
1514                     || (BN_is_negative(p_scalar))) {
1515                     /*
1516                      * this is an unusual input, and we don't guarantee
1517                      * constant-timeness
1518                      */
1519                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1520                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1521                         goto err;
1522                     }
1523                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1524                                                secrets[i], sizeof(secrets[i]));
1525                 } else {
1526                     num_bytes = BN_bn2lebinpad(p_scalar,
1527                                                secrets[i], sizeof(secrets[i]));
1528                 }
1529                 if (num_bytes < 0) {
1530                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1531                     goto err;
1532                 }
1533                 /* precompute multiples */
1534                 if ((!BN_to_felem(x_out, p->X)) ||
1535                     (!BN_to_felem(y_out, p->Y)) ||
1536                     (!BN_to_felem(z_out, p->Z)))
1537                     goto err;
1538                 felem_assign(pre_comp[i][1][0], x_out);
1539                 felem_assign(pre_comp[i][1][1], y_out);
1540                 felem_assign(pre_comp[i][1][2], z_out);
1541                 for (j = 2; j <= 16; ++j) {
1542                     if (j & 1) {
1543                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1544                                   pre_comp[i][j][2], pre_comp[i][1][0],
1545                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1546                                   pre_comp[i][j - 1][0],
1547                                   pre_comp[i][j - 1][1],
1548                                   pre_comp[i][j - 1][2]);
1549                     } else {
1550                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1551                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1552                                      pre_comp[i][j / 2][1],
1553                                      pre_comp[i][j / 2][2]);
1554                     }
1555                 }
1556             }
1557         }
1558         if (mixed)
1559             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1560     }
1561
1562     /* the scalar for the generator */
1563     if ((scalar != NULL) && (have_pre_comp)) {
1564         memset(g_secret, 0, sizeof(g_secret));
1565         /* reduce scalar to 0 <= scalar < 2^224 */
1566         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1567             /*
1568              * this is an unusual input, and we don't guarantee
1569              * constant-timeness
1570              */
1571             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1572                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1573                 goto err;
1574             }
1575             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1576         } else {
1577             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1578         }
1579         /* do the multiplication with generator precomputation */
1580         batch_mul(x_out, y_out, z_out,
1581                   (const felem_bytearray(*))secrets, num_points,
1582                   g_secret,
1583                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1584     } else {
1585         /* do the multiplication without generator precomputation */
1586         batch_mul(x_out, y_out, z_out,
1587                   (const felem_bytearray(*))secrets, num_points,
1588                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1589     }
1590     /* reduce the output to its unique minimal representation */
1591     felem_contract(x_in, x_out);
1592     felem_contract(y_in, y_out);
1593     felem_contract(z_in, z_out);
1594     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1595         (!felem_to_BN(z, z_in))) {
1596         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1597         goto err;
1598     }
1599     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1600                                                              ctx);
1601
1602  err:
1603     BN_CTX_end(ctx);
1604     EC_POINT_free(generator);
1605     OPENSSL_free(secrets);
1606     OPENSSL_free(pre_comp);
1607     OPENSSL_free(tmp_felems);
1608     return ret;
1609 }
1610
1611 int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1612 {
1613     int ret = 0;
1614     NISTP224_PRE_COMP *pre = NULL;
1615     int i, j;
1616     BIGNUM *x, *y;
1617     EC_POINT *generator = NULL;
1618     felem tmp_felems[32];
1619 #ifndef FIPS_MODULE
1620     BN_CTX *new_ctx = NULL;
1621 #endif
1622
1623     /* throw away old precomputation */
1624     EC_pre_comp_free(group);
1625
1626 #ifndef FIPS_MODULE
1627     if (ctx == NULL)
1628         ctx = new_ctx = BN_CTX_new();
1629 #endif
1630     if (ctx == NULL)
1631         return 0;
1632
1633     BN_CTX_start(ctx);
1634     x = BN_CTX_get(ctx);
1635     y = BN_CTX_get(ctx);
1636     if (y == NULL)
1637         goto err;
1638     /* get the generator */
1639     if (group->generator == NULL)
1640         goto err;
1641     generator = EC_POINT_new(group);
1642     if (generator == NULL)
1643         goto err;
1644     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1645     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1646     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1647         goto err;
1648     if ((pre = nistp224_pre_comp_new()) == NULL)
1649         goto err;
1650     /*
1651      * if the generator is the standard one, use built-in precomputation
1652      */
1653     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1654         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1655         goto done;
1656     }
1657     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1658         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1659         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1660         goto err;
1661     /*
1662      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1663      * 2^140*G, 2^196*G for the second one
1664      */
1665     for (i = 1; i <= 8; i <<= 1) {
1666         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1667                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1668                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1669         for (j = 0; j < 27; ++j) {
1670             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1671                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1672                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1673         }
1674         if (i == 8)
1675             break;
1676         point_double(pre->g_pre_comp[0][2 * i][0],
1677                      pre->g_pre_comp[0][2 * i][1],
1678                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1679                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1680         for (j = 0; j < 27; ++j) {
1681             point_double(pre->g_pre_comp[0][2 * i][0],
1682                          pre->g_pre_comp[0][2 * i][1],
1683                          pre->g_pre_comp[0][2 * i][2],
1684                          pre->g_pre_comp[0][2 * i][0],
1685                          pre->g_pre_comp[0][2 * i][1],
1686                          pre->g_pre_comp[0][2 * i][2]);
1687         }
1688     }
1689     for (i = 0; i < 2; i++) {
1690         /* g_pre_comp[i][0] is the point at infinity */
1691         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1692         /* the remaining multiples */
1693         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1694         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1695                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1696                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1697                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1698                   pre->g_pre_comp[i][2][2]);
1699         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1700         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1701                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1702                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1703                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704                   pre->g_pre_comp[i][2][2]);
1705         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1706         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1707                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1708                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1710                   pre->g_pre_comp[i][4][2]);
1711         /*
1712          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1713          */
1714         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1715                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1716                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1717                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1718                   pre->g_pre_comp[i][2][2]);
1719         for (j = 1; j < 8; ++j) {
1720             /* odd multiples: add G resp. 2^28*G */
1721             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1722                       pre->g_pre_comp[i][2 * j + 1][1],
1723                       pre->g_pre_comp[i][2 * j + 1][2],
1724                       pre->g_pre_comp[i][2 * j][0],
1725                       pre->g_pre_comp[i][2 * j][1],
1726                       pre->g_pre_comp[i][2 * j][2], 0,
1727                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1728                       pre->g_pre_comp[i][1][2]);
1729         }
1730     }
1731     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1732
1733  done:
1734     SETPRECOMP(group, nistp224, pre);
1735     pre = NULL;
1736     ret = 1;
1737  err:
1738     BN_CTX_end(ctx);
1739     EC_POINT_free(generator);
1740 #ifndef FIPS_MODULE
1741     BN_CTX_free(new_ctx);
1742 #endif
1743     EC_nistp224_pre_comp_free(pre);
1744     return ret;
1745 }
1746
1747 int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1748 {
1749     return HAVEPRECOMP(group, nistp224);
1750 }