crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * ECDSA low level APIs are deprecated for public use, but still ok for
  28  * internal use.
  29  */
  30 #include "internal/deprecated.h"
  31
  32 /*
  33  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  34  *
  35  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  36  * and Adam Langley's public domain 64-bit C implementation of curve25519
  37  */
  38
  39 #include <openssl/opensslconf.h>
  40
  41 #include <stdint.h>
  42 #include <string.h>
  43 #include <openssl/err.h>
  44 #include "ec_local.h"
  45
  46 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
  47   /* even with gcc, the typedef won't work for 32-bit platforms */
  48 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  49                                  * platforms */
  50 #else
  51 # error "Your compiler doesn't appear to support 128-bit integer types"
  52 #endif
  53
  54 typedef uint8_t u8;
  55 typedef uint64_t u64;
  56
  57 /******************************************************************************/
  58 /*-
  59  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  60  *
  61  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  62  * using 64-bit coefficients called 'limbs',
  63  * and sometimes (for multiplication results) as
  64  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  65  * using 128-bit coefficients called 'widelimbs'.
  66  * A 4-limb representation is an 'felem';
  67  * a 7-widelimb representation is a 'widefelem'.
  68  * Even within felems, bits of adjacent limbs overlap, and we don't always
  69  * reduce the representations: we ensure that inputs to each felem
  70  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  71  * and fit into a 128-bit word without overflow. The coefficients are then
  72  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  73  * We only reduce to the unique minimal representation at the end of the
  74  * computation.
  75  */
  76
  77 typedef uint64_t limb;
  78 typedef uint128_t widelimb;
  79
  80 typedef limb felem[4];
  81 typedef widelimb widefelem[7];
  82
  83 /*
  84  * Field element represented as a byte array. 28*8 = 224 bits is also the
  85  * group order size for the elliptic curve, and we also use this type for
  86  * scalars for point multiplication.
  87  */
  88 typedef u8 felem_bytearray[28];
  89
  90 static const felem_bytearray nistp224_curve_params[5] = {
  91     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  93      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  94     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  95      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  96      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  97     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  98      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  99      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
 100     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
 101      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
 102      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 103     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 104      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 105      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 106 };
 107
 108 /*-
 109  * Precomputed multiples of the standard generator
 110  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 111  * (0 for the point at infinity).
 112  * For each field element, slice a_0 is word 0, etc.
 113  *
 114  * The table has 2 * 16 elements, starting with the following:
 115  * index | bits    | point
 116  * ------+---------+------------------------------
 117  *     0 | 0 0 0 0 | 0G
 118  *     1 | 0 0 0 1 | 1G
 119  *     2 | 0 0 1 0 | 2^56G
 120  *     3 | 0 0 1 1 | (2^56 + 1)G
 121  *     4 | 0 1 0 0 | 2^112G
 122  *     5 | 0 1 0 1 | (2^112 + 1)G
 123  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 124  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 125  *     8 | 1 0 0 0 | 2^168G
 126  *     9 | 1 0 0 1 | (2^168 + 1)G
 127  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 128  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 129  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 130  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 131  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 132  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 133  * followed by a copy of this with each element multiplied by 2^28.
 134  *
 135  * The reason for this is so that we can clock bits into four different
 136  * locations when doing simple scalar multiplies against the base point,
 137  * and then another four locations using the second 16 elements.
 138  */
 139 static const felem gmul[2][16][3] = {
 140 {{{0, 0, 0, 0},
 141   {0, 0, 0, 0},
 142   {0, 0, 0, 0}},
 143  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 144   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 145   {1, 0, 0, 0}},
 146  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 147   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 148   {1, 0, 0, 0}},
 149  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 150   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 151   {1, 0, 0, 0}},
 152  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 153   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 154   {1, 0, 0, 0}},
 155  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 156   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 157   {1, 0, 0, 0}},
 158  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 159   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 160   {1, 0, 0, 0}},
 161  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 162   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 163   {1, 0, 0, 0}},
 164  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 165   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 166   {1, 0, 0, 0}},
 167  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 168   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 169   {1, 0, 0, 0}},
 170  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 171   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 172   {1, 0, 0, 0}},
 173  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 174   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 175   {1, 0, 0, 0}},
 176  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 177   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 178   {1, 0, 0, 0}},
 179  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 180   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 181   {1, 0, 0, 0}},
 182  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 183   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 184   {1, 0, 0, 0}},
 185  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 186   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 187   {1, 0, 0, 0}}},
 188 {{{0, 0, 0, 0},
 189   {0, 0, 0, 0},
 190   {0, 0, 0, 0}},
 191  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 192   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 193   {1, 0, 0, 0}},
 194  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 195   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 196   {1, 0, 0, 0}},
 197  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 198   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 199   {1, 0, 0, 0}},
 200  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 201   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 202   {1, 0, 0, 0}},
 203  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 204   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 205   {1, 0, 0, 0}},
 206  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 207   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 208   {1, 0, 0, 0}},
 209  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 210   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 211   {1, 0, 0, 0}},
 212  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 213   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 214   {1, 0, 0, 0}},
 215  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 216   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 217   {1, 0, 0, 0}},
 218  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 219   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 220   {1, 0, 0, 0}},
 221  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 222   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 223   {1, 0, 0, 0}},
 224  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 225   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 226   {1, 0, 0, 0}},
 227  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 228   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 229   {1, 0, 0, 0}},
 230  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 231   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 232   {1, 0, 0, 0}},
 233  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 234   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 235   {1, 0, 0, 0}}}
 236 };
 237
 238 /* Precomputation for the group generator. */
 239 struct nistp224_pre_comp_st {
 240     felem g_pre_comp[2][16][3];
 241     CRYPTO_REF_COUNT references;
 242     CRYPTO_RWLOCK *lock;
 243 };
 244
 245 const EC_METHOD *EC_GFp_nistp224_method(void)
 246 {
 247     static const EC_METHOD ret = {
 248         EC_FLAGS_DEFAULT_OCT,
 249         NID_X9_62_prime_field,
 250         ec_GFp_nistp224_group_init,
 251         ec_GFp_simple_group_finish,
 252         ec_GFp_simple_group_clear_finish,
 253         ec_GFp_nist_group_copy,
 254         ec_GFp_nistp224_group_set_curve,
 255         ec_GFp_simple_group_get_curve,
 256         ec_GFp_simple_group_get_degree,
 257         ec_group_simple_order_bits,
 258         ec_GFp_simple_group_check_discriminant,
 259         ec_GFp_simple_point_init,
 260         ec_GFp_simple_point_finish,
 261         ec_GFp_simple_point_clear_finish,
 262         ec_GFp_simple_point_copy,
 263         ec_GFp_simple_point_set_to_infinity,
 264         ec_GFp_simple_point_set_affine_coordinates,
 265         ec_GFp_nistp224_point_get_affine_coordinates,
 266         0 /* point_set_compressed_coordinates */ ,
 267         0 /* point2oct */ ,
 268         0 /* oct2point */ ,
 269         ec_GFp_simple_add,
 270         ec_GFp_simple_dbl,
 271         ec_GFp_simple_invert,
 272         ec_GFp_simple_is_at_infinity,
 273         ec_GFp_simple_is_on_curve,
 274         ec_GFp_simple_cmp,
 275         ec_GFp_simple_make_affine,
 276         ec_GFp_simple_points_make_affine,
 277         ec_GFp_nistp224_points_mul,
 278         ec_GFp_nistp224_precompute_mult,
 279         ec_GFp_nistp224_have_precompute_mult,
 280         ec_GFp_nist_field_mul,
 281         ec_GFp_nist_field_sqr,
 282         0 /* field_div */ ,
 283         ec_GFp_simple_field_inv,
 284         0 /* field_encode */ ,
 285         0 /* field_decode */ ,
 286         0,                      /* field_set_to_one */
 287         ec_key_simple_priv2oct,
 288         ec_key_simple_oct2priv,
 289         0, /* set private */
 290         ec_key_simple_generate_key,
 291         ec_key_simple_check_key,
 292         ec_key_simple_generate_public_key,
 293         0, /* keycopy */
 294         0, /* keyfinish */
 295         ecdh_simple_compute_key,
 296         ecdsa_simple_sign_setup,
 297         ecdsa_simple_sign_sig,
 298         ecdsa_simple_verify_sig,
 299         0, /* field_inverse_mod_ord */
 300         0, /* blind_coordinates */
 301         0, /* ladder_pre */
 302         0, /* ladder_step */
 303         0  /* ladder_post */
 304     };
 305
 306     return &ret;
 307 }
 308
 309 /*
 310  * Helper functions to convert field elements to/from internal representation
 311  */
 312 static void bin28_to_felem(felem out, const u8 in[28])
 313 {
 314     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 315     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 316     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 317     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 318 }
 319
 320 static void felem_to_bin28(u8 out[28], const felem in)
 321 {
 322     unsigned i;
 323     for (i = 0; i < 7; ++i) {
 324         out[i] = in[0] >> (8 * i);
 325         out[i + 7] = in[1] >> (8 * i);
 326         out[i + 14] = in[2] >> (8 * i);
 327         out[i + 21] = in[3] >> (8 * i);
 328     }
 329 }
 330
 331 /* From OpenSSL BIGNUM to internal representation */
 332 static int BN_to_felem(felem out, const BIGNUM *bn)
 333 {
 334     felem_bytearray b_out;
 335     int num_bytes;
 336
 337     if (BN_is_negative(bn)) {
 338         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 339         return 0;
 340     }
 341     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 342     if (num_bytes < 0) {
 343         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 344         return 0;
 345     }
 346     bin28_to_felem(out, b_out);
 347     return 1;
 348 }
 349
 350 /* From internal representation to OpenSSL BIGNUM */
 351 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 352 {
 353     felem_bytearray b_out;
 354     felem_to_bin28(b_out, in);
 355     return BN_lebin2bn(b_out, sizeof(b_out), out);
 356 }
 357
 358 /******************************************************************************/
 359 /*-
 360  *                              FIELD OPERATIONS
 361  *
 362  * Field operations, using the internal representation of field elements.
 363  * NB! These operations are specific to our point multiplication and cannot be
 364  * expected to be correct in general - e.g., multiplication with a large scalar
 365  * will cause an overflow.
 366  *
 367  */
 368
 369 static void felem_one(felem out)
 370 {
 371     out[0] = 1;
 372     out[1] = 0;
 373     out[2] = 0;
 374     out[3] = 0;
 375 }
 376
 377 static void felem_assign(felem out, const felem in)
 378 {
 379     out[0] = in[0];
 380     out[1] = in[1];
 381     out[2] = in[2];
 382     out[3] = in[3];
 383 }
 384
 385 /* Sum two field elements: out += in */
 386 static void felem_sum(felem out, const felem in)
 387 {
 388     out[0] += in[0];
 389     out[1] += in[1];
 390     out[2] += in[2];
 391     out[3] += in[3];
 392 }
 393
 394 /* Subtract field elements: out -= in */
 395 /* Assumes in[i] < 2^57 */
 396 static void felem_diff(felem out, const felem in)
 397 {
 398     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 399     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 400     static const limb two58m42m2 = (((limb) 1) << 58) -
 401         (((limb) 1) << 42) - (((limb) 1) << 2);
 402
 403     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 404     out[0] += two58p2;
 405     out[1] += two58m42m2;
 406     out[2] += two58m2;
 407     out[3] += two58m2;
 408
 409     out[0] -= in[0];
 410     out[1] -= in[1];
 411     out[2] -= in[2];
 412     out[3] -= in[3];
 413 }
 414
 415 /* Subtract in unreduced 128-bit mode: out -= in */
 416 /* Assumes in[i] < 2^119 */
 417 static void widefelem_diff(widefelem out, const widefelem in)
 418 {
 419     static const widelimb two120 = ((widelimb) 1) << 120;
 420     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 421         (((widelimb) 1) << 64);
 422     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 423         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 424
 425     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 426     out[0] += two120;
 427     out[1] += two120m64;
 428     out[2] += two120m64;
 429     out[3] += two120;
 430     out[4] += two120m104m64;
 431     out[5] += two120m64;
 432     out[6] += two120m64;
 433
 434     out[0] -= in[0];
 435     out[1] -= in[1];
 436     out[2] -= in[2];
 437     out[3] -= in[3];
 438     out[4] -= in[4];
 439     out[5] -= in[5];
 440     out[6] -= in[6];
 441 }
 442
 443 /* Subtract in mixed mode: out128 -= in64 */
 444 /* in[i] < 2^63 */
 445 static void felem_diff_128_64(widefelem out, const felem in)
 446 {
 447     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 448         (((widelimb) 1) << 8);
 449     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 450         (((widelimb) 1) << 8);
 451     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 452         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 453
 454     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 455     out[0] += two64p8;
 456     out[1] += two64m48m8;
 457     out[2] += two64m8;
 458     out[3] += two64m8;
 459
 460     out[0] -= in[0];
 461     out[1] -= in[1];
 462     out[2] -= in[2];
 463     out[3] -= in[3];
 464 }
 465
 466 /*
 467  * Multiply a field element by a scalar: out = out * scalar The scalars we
 468  * actually use are small, so results fit without overflow
 469  */
 470 static void felem_scalar(felem out, const limb scalar)
 471 {
 472     out[0] *= scalar;
 473     out[1] *= scalar;
 474     out[2] *= scalar;
 475     out[3] *= scalar;
 476 }
 477
 478 /*
 479  * Multiply an unreduced field element by a scalar: out = out * scalar The
 480  * scalars we actually use are small, so results fit without overflow
 481  */
 482 static void widefelem_scalar(widefelem out, const widelimb scalar)
 483 {
 484     out[0] *= scalar;
 485     out[1] *= scalar;
 486     out[2] *= scalar;
 487     out[3] *= scalar;
 488     out[4] *= scalar;
 489     out[5] *= scalar;
 490     out[6] *= scalar;
 491 }
 492
 493 /* Square a field element: out = in^2 */
 494 static void felem_square(widefelem out, const felem in)
 495 {
 496     limb tmp0, tmp1, tmp2;
 497     tmp0 = 2 * in[0];
 498     tmp1 = 2 * in[1];
 499     tmp2 = 2 * in[2];
 500     out[0] = ((widelimb) in[0]) * in[0];
 501     out[1] = ((widelimb) in[0]) * tmp1;
 502     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 503     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 504     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 505     out[5] = ((widelimb) in[3]) * tmp2;
 506     out[6] = ((widelimb) in[3]) * in[3];
 507 }
 508
 509 /* Multiply two field elements: out = in1 * in2 */
 510 static void felem_mul(widefelem out, const felem in1, const felem in2)
 511 {
 512     out[0] = ((widelimb) in1[0]) * in2[0];
 513     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 514     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 515              ((widelimb) in1[2]) * in2[0];
 516     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 517              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 518     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 519              ((widelimb) in1[3]) * in2[1];
 520     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 521     out[6] = ((widelimb) in1[3]) * in2[3];
 522 }
 523
 524 /*-
 525  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 526  * Requires in[i] < 2^126,
 527  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 528 static void felem_reduce(felem out, const widefelem in)
 529 {
 530     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 531         (((widelimb) 1) << 15);
 532     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 533         (((widelimb) 1) << 71);
 534     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 535         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 536     widelimb output[5];
 537
 538     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 539     output[0] = in[0] + two127p15;
 540     output[1] = in[1] + two127m71m55;
 541     output[2] = in[2] + two127m71;
 542     output[3] = in[3];
 543     output[4] = in[4];
 544
 545     /* Eliminate in[4], in[5], in[6] */
 546     output[4] += in[6] >> 16;
 547     output[3] += (in[6] & 0xffff) << 40;
 548     output[2] -= in[6];
 549
 550     output[3] += in[5] >> 16;
 551     output[2] += (in[5] & 0xffff) << 40;
 552     output[1] -= in[5];
 553
 554     output[2] += output[4] >> 16;
 555     output[1] += (output[4] & 0xffff) << 40;
 556     output[0] -= output[4];
 557
 558     /* Carry 2 -> 3 -> 4 */
 559     output[3] += output[2] >> 56;
 560     output[2] &= 0x00ffffffffffffff;
 561
 562     output[4] = output[3] >> 56;
 563     output[3] &= 0x00ffffffffffffff;
 564
 565     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 566
 567     /* Eliminate output[4] */
 568     output[2] += output[4] >> 16;
 569     /* output[2] < 2^56 + 2^56 = 2^57 */
 570     output[1] += (output[4] & 0xffff) << 40;
 571     output[0] -= output[4];
 572
 573     /* Carry 0 -> 1 -> 2 -> 3 */
 574     output[1] += output[0] >> 56;
 575     out[0] = output[0] & 0x00ffffffffffffff;
 576
 577     output[2] += output[1] >> 56;
 578     /* output[2] < 2^57 + 2^72 */
 579     out[1] = output[1] & 0x00ffffffffffffff;
 580     output[3] += output[2] >> 56;
 581     /* output[3] <= 2^56 + 2^16 */
 582     out[2] = output[2] & 0x00ffffffffffffff;
 583
 584     /*-
 585      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 586      * out[3] <= 2^56 + 2^16 (due to final carry),
 587      * so out < 2*p
 588      */
 589     out[3] = output[3];
 590 }
 591
 592 static void felem_square_reduce(felem out, const felem in)
 593 {
 594     widefelem tmp;
 595     felem_square(tmp, in);
 596     felem_reduce(out, tmp);
 597 }
 598
 599 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 600 {
 601     widefelem tmp;
 602     felem_mul(tmp, in1, in2);
 603     felem_reduce(out, tmp);
 604 }
 605
 606 /*
 607  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 608  * call felem_reduce first)
 609  */
 610 static void felem_contract(felem out, const felem in)
 611 {
 612     static const int64_t two56 = ((limb) 1) << 56;
 613     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 614     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 615     int64_t tmp[4], a;
 616     tmp[0] = in[0];
 617     tmp[1] = in[1];
 618     tmp[2] = in[2];
 619     tmp[3] = in[3];
 620     /* Case 1: a = 1 iff in >= 2^224 */
 621     a = (in[3] >> 56);
 622     tmp[0] -= a;
 623     tmp[1] += a << 40;
 624     tmp[3] &= 0x00ffffffffffffff;
 625     /*
 626      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 627      * and the lower part is non-zero
 628      */
 629     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 630         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 631     a &= 0x00ffffffffffffff;
 632     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 633     a = (a - 1) >> 63;
 634     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 635     tmp[3] &= a ^ 0xffffffffffffffff;
 636     tmp[2] &= a ^ 0xffffffffffffffff;
 637     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 638     tmp[0] -= 1 & a;
 639
 640     /*
 641      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 642      * non-zero, so we only need one step
 643      */
 644     a = tmp[0] >> 63;
 645     tmp[0] += two56 & a;
 646     tmp[1] -= 1 & a;
 647
 648     /* carry 1 -> 2 -> 3 */
 649     tmp[2] += tmp[1] >> 56;
 650     tmp[1] &= 0x00ffffffffffffff;
 651
 652     tmp[3] += tmp[2] >> 56;
 653     tmp[2] &= 0x00ffffffffffffff;
 654
 655     /* Now 0 <= out < p */
 656     out[0] = tmp[0];
 657     out[1] = tmp[1];
 658     out[2] = tmp[2];
 659     out[3] = tmp[3];
 660 }
 661
 662 /*
 663  * Get negative value: out = -in
 664  * Requires in[i] < 2^63,
 665  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 666  */
 667 static void felem_neg(felem out, const felem in)
 668 {
 669     widefelem tmp;
 670
 671     memset(tmp, 0, sizeof(tmp));
 672     felem_diff_128_64(tmp, in);
 673     felem_reduce(out, tmp);
 674 }
 675
 676 /*
 677  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 678  * elements are reduced to in < 2^225, so we only need to check three cases:
 679  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 680  */
 681 static limb felem_is_zero(const felem in)
 682 {
 683     limb zero, two224m96p1, two225m97p2;
 684
 685     zero = in[0] | in[1] | in[2] | in[3];
 686     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 687     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 688         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 689     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 690     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 691         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 692     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 693     return (zero | two224m96p1 | two225m97p2);
 694 }
 695
 696 static int felem_is_zero_int(const void *in)
 697 {
 698     return (int)(felem_is_zero(in) & ((limb) 1));
 699 }
 700
 701 /* Invert a field element */
 702 /* Computation chain copied from djb's code */
 703 static void felem_inv(felem out, const felem in)
 704 {
 705     felem ftmp, ftmp2, ftmp3, ftmp4;
 706     widefelem tmp;
 707     unsigned i;
 708
 709     felem_square(tmp, in);
 710     felem_reduce(ftmp, tmp);    /* 2 */
 711     felem_mul(tmp, in, ftmp);
 712     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 713     felem_square(tmp, ftmp);
 714     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 715     felem_mul(tmp, in, ftmp);
 716     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 717     felem_square(tmp, ftmp);
 718     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 719     felem_square(tmp, ftmp2);
 720     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 721     felem_square(tmp, ftmp2);
 722     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 723     felem_mul(tmp, ftmp2, ftmp);
 724     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 725     felem_square(tmp, ftmp);
 726     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 727     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 728         felem_square(tmp, ftmp2);
 729         felem_reduce(ftmp2, tmp);
 730     }
 731     felem_mul(tmp, ftmp2, ftmp);
 732     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 733     felem_square(tmp, ftmp2);
 734     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 735     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 736         felem_square(tmp, ftmp3);
 737         felem_reduce(ftmp3, tmp);
 738     }
 739     felem_mul(tmp, ftmp3, ftmp2);
 740     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 741     felem_square(tmp, ftmp2);
 742     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 743     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 744         felem_square(tmp, ftmp3);
 745         felem_reduce(ftmp3, tmp);
 746     }
 747     felem_mul(tmp, ftmp3, ftmp2);
 748     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 749     felem_square(tmp, ftmp3);
 750     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 751     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 752         felem_square(tmp, ftmp4);
 753         felem_reduce(ftmp4, tmp);
 754     }
 755     felem_mul(tmp, ftmp3, ftmp4);
 756     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 757     felem_square(tmp, ftmp3);
 758     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 759     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 760         felem_square(tmp, ftmp4);
 761         felem_reduce(ftmp4, tmp);
 762     }
 763     felem_mul(tmp, ftmp2, ftmp4);
 764     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 765     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 766         felem_square(tmp, ftmp2);
 767         felem_reduce(ftmp2, tmp);
 768     }
 769     felem_mul(tmp, ftmp2, ftmp);
 770     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 771     felem_square(tmp, ftmp);
 772     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 773     felem_mul(tmp, ftmp, in);
 774     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 775     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 776         felem_square(tmp, ftmp);
 777         felem_reduce(ftmp, tmp);
 778     }
 779     felem_mul(tmp, ftmp, ftmp3);
 780     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 781 }
 782
 783 /*
 784  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 785  * out to itself.
 786  */
 787 static void copy_conditional(felem out, const felem in, limb icopy)
 788 {
 789     unsigned i;
 790     /*
 791      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 792      */
 793     const limb copy = -icopy;
 794     for (i = 0; i < 4; ++i) {
 795         const limb tmp = copy & (in[i] ^ out[i]);
 796         out[i] ^= tmp;
 797     }
 798 }
 799
 800 /******************************************************************************/
 801 /*-
 802  *                       ELLIPTIC CURVE POINT OPERATIONS
 803  *
 804  * Points are represented in Jacobian projective coordinates:
 805  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 806  * or to the point at infinity if Z == 0.
 807  *
 808  */
 809
 810 /*-
 811  * Double an elliptic curve point:
 812  * (X', Y', Z') = 2 * (X, Y, Z), where
 813  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 814  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 815  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 816  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 817  * while x_out == y_in is not (maybe this works, but it's not tested).
 818  */
 819 static void
 820 point_double(felem x_out, felem y_out, felem z_out,
 821              const felem x_in, const felem y_in, const felem z_in)
 822 {
 823     widefelem tmp, tmp2;
 824     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 825
 826     felem_assign(ftmp, x_in);
 827     felem_assign(ftmp2, x_in);
 828
 829     /* delta = z^2 */
 830     felem_square(tmp, z_in);
 831     felem_reduce(delta, tmp);
 832
 833     /* gamma = y^2 */
 834     felem_square(tmp, y_in);
 835     felem_reduce(gamma, tmp);
 836
 837     /* beta = x*gamma */
 838     felem_mul(tmp, x_in, gamma);
 839     felem_reduce(beta, tmp);
 840
 841     /* alpha = 3*(x-delta)*(x+delta) */
 842     felem_diff(ftmp, delta);
 843     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 844     felem_sum(ftmp2, delta);
 845     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 846     felem_scalar(ftmp2, 3);
 847     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 848     felem_mul(tmp, ftmp, ftmp2);
 849     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 850     felem_reduce(alpha, tmp);
 851
 852     /* x' = alpha^2 - 8*beta */
 853     felem_square(tmp, alpha);
 854     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 855     felem_assign(ftmp, beta);
 856     felem_scalar(ftmp, 8);
 857     /* ftmp[i] < 8 * 2^57 = 2^60 */
 858     felem_diff_128_64(tmp, ftmp);
 859     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 860     felem_reduce(x_out, tmp);
 861
 862     /* z' = (y + z)^2 - gamma - delta */
 863     felem_sum(delta, gamma);
 864     /* delta[i] < 2^57 + 2^57 = 2^58 */
 865     felem_assign(ftmp, y_in);
 866     felem_sum(ftmp, z_in);
 867     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 868     felem_square(tmp, ftmp);
 869     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 870     felem_diff_128_64(tmp, delta);
 871     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 872     felem_reduce(z_out, tmp);
 873
 874     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 875     felem_scalar(beta, 4);
 876     /* beta[i] < 4 * 2^57 = 2^59 */
 877     felem_diff(beta, x_out);
 878     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 879     felem_mul(tmp, alpha, beta);
 880     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 881     felem_square(tmp2, gamma);
 882     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 883     widefelem_scalar(tmp2, 8);
 884     /* tmp2[i] < 8 * 2^116 = 2^119 */
 885     widefelem_diff(tmp, tmp2);
 886     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 887     felem_reduce(y_out, tmp);
 888 }
 889
 890 /*-
 891  * Add two elliptic curve points:
 892  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 893  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 894  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 895  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 896  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 897  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 898  *
 899  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 900  */
 901
 902 /*
 903  * This function is not entirely constant-time: it includes a branch for
 904  * checking whether the two input points are equal, (while not equal to the
 905  * point at infinity). This case never happens during single point
 906  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 907  */
 908 static void point_add(felem x3, felem y3, felem z3,
 909                       const felem x1, const felem y1, const felem z1,
 910                       const int mixed, const felem x2, const felem y2,
 911                       const felem z2)
 912 {
 913     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 914     widefelem tmp, tmp2;
 915     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 916     limb points_equal;
 917
 918     if (!mixed) {
 919         /* ftmp2 = z2^2 */
 920         felem_square(tmp, z2);
 921         felem_reduce(ftmp2, tmp);
 922
 923         /* ftmp4 = z2^3 */
 924         felem_mul(tmp, ftmp2, z2);
 925         felem_reduce(ftmp4, tmp);
 926
 927         /* ftmp4 = z2^3*y1 */
 928         felem_mul(tmp2, ftmp4, y1);
 929         felem_reduce(ftmp4, tmp2);
 930
 931         /* ftmp2 = z2^2*x1 */
 932         felem_mul(tmp2, ftmp2, x1);
 933         felem_reduce(ftmp2, tmp2);
 934     } else {
 935         /*
 936          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 937          */
 938
 939         /* ftmp4 = z2^3*y1 */
 940         felem_assign(ftmp4, y1);
 941
 942         /* ftmp2 = z2^2*x1 */
 943         felem_assign(ftmp2, x1);
 944     }
 945
 946     /* ftmp = z1^2 */
 947     felem_square(tmp, z1);
 948     felem_reduce(ftmp, tmp);
 949
 950     /* ftmp3 = z1^3 */
 951     felem_mul(tmp, ftmp, z1);
 952     felem_reduce(ftmp3, tmp);
 953
 954     /* tmp = z1^3*y2 */
 955     felem_mul(tmp, ftmp3, y2);
 956     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 957
 958     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 959     felem_diff_128_64(tmp, ftmp4);
 960     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 961     felem_reduce(ftmp3, tmp);
 962
 963     /* tmp = z1^2*x2 */
 964     felem_mul(tmp, ftmp, x2);
 965     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 966
 967     /* ftmp = z1^2*x2 - z2^2*x1 */
 968     felem_diff_128_64(tmp, ftmp2);
 969     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 970     felem_reduce(ftmp, tmp);
 971
 972     /*
 973      * The formulae are incorrect if the points are equal, in affine coordinates
 974      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 975      * happens.
 976      *
 977      * We use bitwise operations to avoid potential side-channels introduced by
 978      * the short-circuiting behaviour of boolean operators.
 979      */
 980     x_equal = felem_is_zero(ftmp);
 981     y_equal = felem_is_zero(ftmp3);
 982     /*
 983      * The special case of either point being the point at infinity (z1 and/or
 984      * z2 are zero), is handled separately later on in this function, so we
 985      * avoid jumping to point_double here in those special cases.
 986      */
 987     z1_is_zero = felem_is_zero(z1);
 988     z2_is_zero = felem_is_zero(z2);
 989
 990     /*
 991      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 992      * specific implementation `felem_is_zero()` returns truth as `0x1`
 993      * (rather than `0xff..ff`).
 994      *
 995      * This implies that `~true` in this implementation becomes
 996      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 997      * the if expression, we mask out only the last bit in the next
 998      * line.
 999      */
1000     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1001
1002     if (points_equal) {
1003         /*
1004          * This is obviously not constant-time but, as mentioned before, this
1005          * case never happens during single point multiplication, so there is no
1006          * timing leak for ECDH or ECDSA signing.
1007          */
1008         point_double(x3, y3, z3, x1, y1, z1);
1009         return;
1010     }
1011
1012     /* ftmp5 = z1*z2 */
1013     if (!mixed) {
1014         felem_mul(tmp, z1, z2);
1015         felem_reduce(ftmp5, tmp);
1016     } else {
1017         /* special case z2 = 0 is handled later */
1018         felem_assign(ftmp5, z1);
1019     }
1020
1021     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1022     felem_mul(tmp, ftmp, ftmp5);
1023     felem_reduce(z_out, tmp);
1024
1025     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1026     felem_assign(ftmp5, ftmp);
1027     felem_square(tmp, ftmp);
1028     felem_reduce(ftmp, tmp);
1029
1030     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1031     felem_mul(tmp, ftmp, ftmp5);
1032     felem_reduce(ftmp5, tmp);
1033
1034     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1035     felem_mul(tmp, ftmp2, ftmp);
1036     felem_reduce(ftmp2, tmp);
1037
1038     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1039     felem_mul(tmp, ftmp4, ftmp5);
1040     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1041
1042     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1043     felem_square(tmp2, ftmp3);
1044     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1045
1046     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1047     felem_diff_128_64(tmp2, ftmp5);
1048     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1049
1050     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1051     felem_assign(ftmp5, ftmp2);
1052     felem_scalar(ftmp5, 2);
1053     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1054
1055     /*-
1056      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1057      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1058      */
1059     felem_diff_128_64(tmp2, ftmp5);
1060     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1061     felem_reduce(x_out, tmp2);
1062
1063     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1064     felem_diff(ftmp2, x_out);
1065     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1066
1067     /*
1068      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1069      */
1070     felem_mul(tmp2, ftmp3, ftmp2);
1071     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1072
1073     /*-
1074      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1075      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1076      */
1077     widefelem_diff(tmp2, tmp);
1078     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1079     felem_reduce(y_out, tmp2);
1080
1081     /*
1082      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1083      * the point at infinity, so we need to check for this separately
1084      */
1085
1086     /*
1087      * if point 1 is at infinity, copy point 2 to output, and vice versa
1088      */
1089     copy_conditional(x_out, x2, z1_is_zero);
1090     copy_conditional(x_out, x1, z2_is_zero);
1091     copy_conditional(y_out, y2, z1_is_zero);
1092     copy_conditional(y_out, y1, z2_is_zero);
1093     copy_conditional(z_out, z2, z1_is_zero);
1094     copy_conditional(z_out, z1, z2_is_zero);
1095     felem_assign(x3, x_out);
1096     felem_assign(y3, y_out);
1097     felem_assign(z3, z_out);
1098 }
1099
1100 /*
1101  * select_point selects the |idx|th point from a precomputation table and
1102  * copies it to out.
1103  * The pre_comp array argument should be size of |size| argument
1104  */
1105 static void select_point(const u64 idx, unsigned int size,
1106                          const felem pre_comp[][3], felem out[3])
1107 {
1108     unsigned i, j;
1109     limb *outlimbs = &out[0][0];
1110
1111     memset(out, 0, sizeof(*out) * 3);
1112     for (i = 0; i < size; i++) {
1113         const limb *inlimbs = &pre_comp[i][0][0];
1114         u64 mask = i ^ idx;
1115         mask |= mask >> 4;
1116         mask |= mask >> 2;
1117         mask |= mask >> 1;
1118         mask &= 1;
1119         mask--;
1120         for (j = 0; j < 4 * 3; j++)
1121             outlimbs[j] |= inlimbs[j] & mask;
1122     }
1123 }
1124
1125 /* get_bit returns the |i|th bit in |in| */
1126 static char get_bit(const felem_bytearray in, unsigned i)
1127 {
1128     if (i >= 224)
1129         return 0;
1130     return (in[i >> 3] >> (i & 7)) & 1;
1131 }
1132
1133 /*
1134  * Interleaved point multiplication using precomputed point multiples: The
1135  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1136  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1137  * generator, using certain (large) precomputed multiples in g_pre_comp.
1138  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1139  */
1140 static void batch_mul(felem x_out, felem y_out, felem z_out,
1141                       const felem_bytearray scalars[],
1142                       const unsigned num_points, const u8 *g_scalar,
1143                       const int mixed, const felem pre_comp[][17][3],
1144                       const felem g_pre_comp[2][16][3])
1145 {
1146     int i, skip;
1147     unsigned num;
1148     unsigned gen_mul = (g_scalar != NULL);
1149     felem nq[3], tmp[4];
1150     u64 bits;
1151     u8 sign, digit;
1152
1153     /* set nq to the point at infinity */
1154     memset(nq, 0, sizeof(nq));
1155
1156     /*
1157      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1158      * of the generator (two in each of the last 28 rounds) and additions of
1159      * other points multiples (every 5th round).
1160      */
1161     skip = 1;                   /* save two point operations in the first
1162                                  * round */
1163     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1164         /* double */
1165         if (!skip)
1166             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1167
1168         /* add multiples of the generator */
1169         if (gen_mul && (i <= 27)) {
1170             /* first, look 28 bits upwards */
1171             bits = get_bit(g_scalar, i + 196) << 3;
1172             bits |= get_bit(g_scalar, i + 140) << 2;
1173             bits |= get_bit(g_scalar, i + 84) << 1;
1174             bits |= get_bit(g_scalar, i + 28);
1175             /* select the point to add, in constant time */
1176             select_point(bits, 16, g_pre_comp[1], tmp);
1177
1178             if (!skip) {
1179                 /* value 1 below is argument for "mixed" */
1180                 point_add(nq[0], nq[1], nq[2],
1181                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1182             } else {
1183                 memcpy(nq, tmp, 3 * sizeof(felem));
1184                 skip = 0;
1185             }
1186
1187             /* second, look at the current position */
1188             bits = get_bit(g_scalar, i + 168) << 3;
1189             bits |= get_bit(g_scalar, i + 112) << 2;
1190             bits |= get_bit(g_scalar, i + 56) << 1;
1191             bits |= get_bit(g_scalar, i);
1192             /* select the point to add, in constant time */
1193             select_point(bits, 16, g_pre_comp[0], tmp);
1194             point_add(nq[0], nq[1], nq[2],
1195                       nq[0], nq[1], nq[2],
1196                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1197         }
1198
1199         /* do other additions every 5 doublings */
1200         if (num_points && (i % 5 == 0)) {
1201             /* loop over all scalars */
1202             for (num = 0; num < num_points; ++num) {
1203                 bits = get_bit(scalars[num], i + 4) << 5;
1204                 bits |= get_bit(scalars[num], i + 3) << 4;
1205                 bits |= get_bit(scalars[num], i + 2) << 3;
1206                 bits |= get_bit(scalars[num], i + 1) << 2;
1207                 bits |= get_bit(scalars[num], i) << 1;
1208                 bits |= get_bit(scalars[num], i - 1);
1209                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1210
1211                 /* select the point to add or subtract */
1212                 select_point(digit, 17, pre_comp[num], tmp);
1213                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1214                                             * point */
1215                 copy_conditional(tmp[1], tmp[3], sign);
1216
1217                 if (!skip) {
1218                     point_add(nq[0], nq[1], nq[2],
1219                               nq[0], nq[1], nq[2],
1220                               mixed, tmp[0], tmp[1], tmp[2]);
1221                 } else {
1222                     memcpy(nq, tmp, 3 * sizeof(felem));
1223                     skip = 0;
1224                 }
1225             }
1226         }
1227     }
1228     felem_assign(x_out, nq[0]);
1229     felem_assign(y_out, nq[1]);
1230     felem_assign(z_out, nq[2]);
1231 }
1232
1233 /******************************************************************************/
1234 /*
1235  * FUNCTIONS TO MANAGE PRECOMPUTATION
1236  */
1237
1238 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1239 {
1240     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1241
1242     if (!ret) {
1243         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1244         return ret;
1245     }
1246
1247     ret->references = 1;
1248
1249     ret->lock = CRYPTO_THREAD_lock_new();
1250     if (ret->lock == NULL) {
1251         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1252         OPENSSL_free(ret);
1253         return NULL;
1254     }
1255     return ret;
1256 }
1257
1258 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1259 {
1260     int i;
1261     if (p != NULL)
1262         CRYPTO_UP_REF(&p->references, &i, p->lock);
1263     return p;
1264 }
1265
1266 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1267 {
1268     int i;
1269
1270     if (p == NULL)
1271         return;
1272
1273     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1274     REF_PRINT_COUNT("EC_nistp224", x);
1275     if (i > 0)
1276         return;
1277     REF_ASSERT_ISNT(i < 0);
1278
1279     CRYPTO_THREAD_lock_free(p->lock);
1280     OPENSSL_free(p);
1281 }
1282
1283 /******************************************************************************/
1284 /*
1285  * OPENSSL EC_METHOD FUNCTIONS
1286  */
1287
1288 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1289 {
1290     int ret;
1291     ret = ec_GFp_simple_group_init(group);
1292     group->a_is_minus3 = 1;
1293     return ret;
1294 }
1295
1296 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1297                                     const BIGNUM *a, const BIGNUM *b,
1298                                     BN_CTX *ctx)
1299 {
1300     int ret = 0;
1301     BIGNUM *curve_p, *curve_a, *curve_b;
1302 #ifndef FIPS_MODE
1303     BN_CTX *new_ctx = NULL;
1304
1305     if (ctx == NULL)
1306         ctx = new_ctx = BN_CTX_new();
1307 #endif
1308     if (ctx == NULL)
1309         return 0;
1310
1311     BN_CTX_start(ctx);
1312     curve_p = BN_CTX_get(ctx);
1313     curve_a = BN_CTX_get(ctx);
1314     curve_b = BN_CTX_get(ctx);
1315     if (curve_b == NULL)
1316         goto err;
1317     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322               EC_R_WRONG_CURVE_PARAMETERS);
1323         goto err;
1324     }
1325     group->field_mod_func = BN_nist_mod_224;
1326     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327  err:
1328     BN_CTX_end(ctx);
1329 #ifndef FIPS_MODE
1330     BN_CTX_free(new_ctx);
1331 #endif
1332     return ret;
1333 }
1334
1335 /*
1336  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1337  * (X/Z^2, Y/Z^3)
1338  */
1339 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1340                                                  const EC_POINT *point,
1341                                                  BIGNUM *x, BIGNUM *y,
1342                                                  BN_CTX *ctx)
1343 {
1344     felem z1, z2, x_in, y_in, x_out, y_out;
1345     widefelem tmp;
1346
1347     if (EC_POINT_is_at_infinity(group, point)) {
1348         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1349               EC_R_POINT_AT_INFINITY);
1350         return 0;
1351     }
1352     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1353         (!BN_to_felem(z1, point->Z)))
1354         return 0;
1355     felem_inv(z2, z1);
1356     felem_square(tmp, z2);
1357     felem_reduce(z1, tmp);
1358     felem_mul(tmp, x_in, z1);
1359     felem_reduce(x_in, tmp);
1360     felem_contract(x_out, x_in);
1361     if (x != NULL) {
1362         if (!felem_to_BN(x, x_out)) {
1363             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1364                   ERR_R_BN_LIB);
1365             return 0;
1366         }
1367     }
1368     felem_mul(tmp, z1, z2);
1369     felem_reduce(z1, tmp);
1370     felem_mul(tmp, y_in, z1);
1371     felem_reduce(y_in, tmp);
1372     felem_contract(y_out, y_in);
1373     if (y != NULL) {
1374         if (!felem_to_BN(y, y_out)) {
1375             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1376                   ERR_R_BN_LIB);
1377             return 0;
1378         }
1379     }
1380     return 1;
1381 }
1382
1383 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1384                                felem tmp_felems[ /* num+1 */ ])
1385 {
1386     /*
1387      * Runs in constant time, unless an input is the point at infinity (which
1388      * normally shouldn't happen).
1389      */
1390     ec_GFp_nistp_points_make_affine_internal(num,
1391                                              points,
1392                                              sizeof(felem),
1393                                              tmp_felems,
1394                                              (void (*)(void *))felem_one,
1395                                              felem_is_zero_int,
1396                                              (void (*)(void *, const void *))
1397                                              felem_assign,
1398                                              (void (*)(void *, const void *))
1399                                              felem_square_reduce, (void (*)
1400                                                                    (void *,
1401                                                                     const void
1402                                                                     *,
1403                                                                     const void
1404                                                                     *))
1405                                              felem_mul_reduce,
1406                                              (void (*)(void *, const void *))
1407                                              felem_inv,
1408                                              (void (*)(void *, const void *))
1409                                              felem_contract);
1410 }
1411
1412 /*
1413  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1414  * values Result is stored in r (r can equal one of the inputs).
1415  */
1416 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1417                                const BIGNUM *scalar, size_t num,
1418                                const EC_POINT *points[],
1419                                const BIGNUM *scalars[], BN_CTX *ctx)
1420 {
1421     int ret = 0;
1422     int j;
1423     unsigned i;
1424     int mixed = 0;
1425     BIGNUM *x, *y, *z, *tmp_scalar;
1426     felem_bytearray g_secret;
1427     felem_bytearray *secrets = NULL;
1428     felem (*pre_comp)[17][3] = NULL;
1429     felem *tmp_felems = NULL;
1430     int num_bytes;
1431     int have_pre_comp = 0;
1432     size_t num_points = num;
1433     felem x_in, y_in, z_in, x_out, y_out, z_out;
1434     NISTP224_PRE_COMP *pre = NULL;
1435     const felem(*g_pre_comp)[16][3] = NULL;
1436     EC_POINT *generator = NULL;
1437     const EC_POINT *p = NULL;
1438     const BIGNUM *p_scalar = NULL;
1439
1440     BN_CTX_start(ctx);
1441     x = BN_CTX_get(ctx);
1442     y = BN_CTX_get(ctx);
1443     z = BN_CTX_get(ctx);
1444     tmp_scalar = BN_CTX_get(ctx);
1445     if (tmp_scalar == NULL)
1446         goto err;
1447
1448     if (scalar != NULL) {
1449         pre = group->pre_comp.nistp224;
1450         if (pre)
1451             /* we have precomputation, try to use it */
1452             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1453         else
1454             /* try to use the standard precomputation */
1455             g_pre_comp = &gmul[0];
1456         generator = EC_POINT_new(group);
1457         if (generator == NULL)
1458             goto err;
1459         /* get the generator from precomputation */
1460         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1461             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1462             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1463             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1464             goto err;
1465         }
1466         if (!ec_GFp_simple_set_Jprojective_coordinates_GFp(group, generator, x,
1467                                                            y, z, ctx))
1468             goto err;
1469         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1470             /* precomputation matches generator */
1471             have_pre_comp = 1;
1472         else
1473             /*
1474              * we don't have valid precomputation: treat the generator as a
1475              * random point
1476              */
1477             num_points = num_points + 1;
1478     }
1479
1480     if (num_points > 0) {
1481         if (num_points >= 3) {
1482             /*
1483              * unless we precompute multiples for just one or two points,
1484              * converting those into affine form is time well spent
1485              */
1486             mixed = 1;
1487         }
1488         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1489         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1490         if (mixed)
1491             tmp_felems =
1492                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1493         if ((secrets == NULL) || (pre_comp == NULL)
1494             || (mixed && (tmp_felems == NULL))) {
1495             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1496             goto err;
1497         }
1498
1499         /*
1500          * we treat NULL scalars as 0, and NULL points as points at infinity,
1501          * i.e., they contribute nothing to the linear combination
1502          */
1503         for (i = 0; i < num_points; ++i) {
1504             if (i == num) {
1505                 /* the generator */
1506                 p = EC_GROUP_get0_generator(group);
1507                 p_scalar = scalar;
1508             } else {
1509                 /* the i^th point */
1510                 p = points[i];
1511                 p_scalar = scalars[i];
1512             }
1513             if ((p_scalar != NULL) && (p != NULL)) {
1514                 /* reduce scalar to 0 <= scalar < 2^224 */
1515                 if ((BN_num_bits(p_scalar) > 224)
1516                     || (BN_is_negative(p_scalar))) {
1517                     /*
1518                      * this is an unusual input, and we don't guarantee
1519                      * constant-timeness
1520                      */
1521                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1522                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1523                         goto err;
1524                     }
1525                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1526                                                secrets[i], sizeof(secrets[i]));
1527                 } else {
1528                     num_bytes = BN_bn2lebinpad(p_scalar,
1529                                                secrets[i], sizeof(secrets[i]));
1530                 }
1531                 if (num_bytes < 0) {
1532                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1533                     goto err;
1534                 }
1535                 /* precompute multiples */
1536                 if ((!BN_to_felem(x_out, p->X)) ||
1537                     (!BN_to_felem(y_out, p->Y)) ||
1538                     (!BN_to_felem(z_out, p->Z)))
1539                     goto err;
1540                 felem_assign(pre_comp[i][1][0], x_out);
1541                 felem_assign(pre_comp[i][1][1], y_out);
1542                 felem_assign(pre_comp[i][1][2], z_out);
1543                 for (j = 2; j <= 16; ++j) {
1544                     if (j & 1) {
1545                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1546                                   pre_comp[i][j][2], pre_comp[i][1][0],
1547                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1548                                   pre_comp[i][j - 1][0],
1549                                   pre_comp[i][j - 1][1],
1550                                   pre_comp[i][j - 1][2]);
1551                     } else {
1552                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1553                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1554                                      pre_comp[i][j / 2][1],
1555                                      pre_comp[i][j / 2][2]);
1556                     }
1557                 }
1558             }
1559         }
1560         if (mixed)
1561             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1562     }
1563
1564     /* the scalar for the generator */
1565     if ((scalar != NULL) && (have_pre_comp)) {
1566         memset(g_secret, 0, sizeof(g_secret));
1567         /* reduce scalar to 0 <= scalar < 2^224 */
1568         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1569             /*
1570              * this is an unusual input, and we don't guarantee
1571              * constant-timeness
1572              */
1573             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1574                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1575                 goto err;
1576             }
1577             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1578         } else {
1579             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1580         }
1581         /* do the multiplication with generator precomputation */
1582         batch_mul(x_out, y_out, z_out,
1583                   (const felem_bytearray(*))secrets, num_points,
1584                   g_secret,
1585                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1586     } else {
1587         /* do the multiplication without generator precomputation */
1588         batch_mul(x_out, y_out, z_out,
1589                   (const felem_bytearray(*))secrets, num_points,
1590                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1591     }
1592     /* reduce the output to its unique minimal representation */
1593     felem_contract(x_in, x_out);
1594     felem_contract(y_in, y_out);
1595     felem_contract(z_in, z_out);
1596     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1597         (!felem_to_BN(z, z_in))) {
1598         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1599         goto err;
1600     }
1601     ret = ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1602
1603  err:
1604     BN_CTX_end(ctx);
1605     EC_POINT_free(generator);
1606     OPENSSL_free(secrets);
1607     OPENSSL_free(pre_comp);
1608     OPENSSL_free(tmp_felems);
1609     return ret;
1610 }
1611
1612 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1613 {
1614     int ret = 0;
1615     NISTP224_PRE_COMP *pre = NULL;
1616     int i, j;
1617     BIGNUM *x, *y;
1618     EC_POINT *generator = NULL;
1619     felem tmp_felems[32];
1620 #ifndef FIPS_MODE
1621     BN_CTX *new_ctx = NULL;
1622 #endif
1623
1624     /* throw away old precomputation */
1625     EC_pre_comp_free(group);
1626
1627 #ifndef FIPS_MODE
1628     if (ctx == NULL)
1629         ctx = new_ctx = BN_CTX_new();
1630 #endif
1631     if (ctx == NULL)
1632         return 0;
1633
1634     BN_CTX_start(ctx);
1635     x = BN_CTX_get(ctx);
1636     y = BN_CTX_get(ctx);
1637     if (y == NULL)
1638         goto err;
1639     /* get the generator */
1640     if (group->generator == NULL)
1641         goto err;
1642     generator = EC_POINT_new(group);
1643     if (generator == NULL)
1644         goto err;
1645     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1646     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1647     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1648         goto err;
1649     if ((pre = nistp224_pre_comp_new()) == NULL)
1650         goto err;
1651     /*
1652      * if the generator is the standard one, use built-in precomputation
1653      */
1654     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1655         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1656         goto done;
1657     }
1658     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1659         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1660         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1661         goto err;
1662     /*
1663      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1664      * 2^140*G, 2^196*G for the second one
1665      */
1666     for (i = 1; i <= 8; i <<= 1) {
1667         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1668                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1669                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1670         for (j = 0; j < 27; ++j) {
1671             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1672                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1673                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1674         }
1675         if (i == 8)
1676             break;
1677         point_double(pre->g_pre_comp[0][2 * i][0],
1678                      pre->g_pre_comp[0][2 * i][1],
1679                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1680                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1681         for (j = 0; j < 27; ++j) {
1682             point_double(pre->g_pre_comp[0][2 * i][0],
1683                          pre->g_pre_comp[0][2 * i][1],
1684                          pre->g_pre_comp[0][2 * i][2],
1685                          pre->g_pre_comp[0][2 * i][0],
1686                          pre->g_pre_comp[0][2 * i][1],
1687                          pre->g_pre_comp[0][2 * i][2]);
1688         }
1689     }
1690     for (i = 0; i < 2; i++) {
1691         /* g_pre_comp[i][0] is the point at infinity */
1692         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1693         /* the remaining multiples */
1694         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1695         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1696                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1697                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1698                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1699                   pre->g_pre_comp[i][2][2]);
1700         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1701         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1702                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1703                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1704                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1705                   pre->g_pre_comp[i][2][2]);
1706         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1707         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1708                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1709                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1710                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1711                   pre->g_pre_comp[i][4][2]);
1712         /*
1713          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1714          */
1715         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1716                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1717                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1718                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1719                   pre->g_pre_comp[i][2][2]);
1720         for (j = 1; j < 8; ++j) {
1721             /* odd multiples: add G resp. 2^28*G */
1722             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1723                       pre->g_pre_comp[i][2 * j + 1][1],
1724                       pre->g_pre_comp[i][2 * j + 1][2],
1725                       pre->g_pre_comp[i][2 * j][0],
1726                       pre->g_pre_comp[i][2 * j][1],
1727                       pre->g_pre_comp[i][2 * j][2], 0,
1728                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1729                       pre->g_pre_comp[i][1][2]);
1730         }
1731     }
1732     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1733
1734  done:
1735     SETPRECOMP(group, nistp224, pre);
1736     pre = NULL;
1737     ret = 1;
1738  err:
1739     BN_CTX_end(ctx);
1740     EC_POINT_free(generator);
1741 #ifndef FIPS_MODE
1742     BN_CTX_free(new_ctx);
1743 #endif
1744     EC_nistp224_pre_comp_free(pre);
1745     return ret;
1746 }
1747
1748 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1749 {
1750     return HAVEPRECOMP(group, nistp224);
1751 }