crypto/ec/ecp_nistp224.c

   1 /* crypto/ec/ecp_nistp224.c */
   2 /*
   3  * Written by Emilia Kasper (Google) for the OpenSSL project.
   4  */
   5 /* Copyright 2011 Google Inc.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  *
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *     http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  *  Unless required by applicable law or agreed to in writing, software
  15  *  distributed under the License is distributed on an "AS IS" BASIS,
  16  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  *  See the License for the specific language governing permissions and
  18  *  limitations under the License.
  19  */
  20
  21 /*
  22  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  23  *
  24  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  25  * and Adam Langley's public domain 64-bit C implementation of curve25519
  26  */
  27
  28 #include <openssl/opensslconf.h>
  29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
  30
  31 # include <stdint.h>
  32 # include <string.h>
  33 # include <openssl/err.h>
  34 # include "ec_lcl.h"
  35
  36 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
  37   /* even with gcc, the typedef won't work for 32-bit platforms */
  38 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  39                                  * platforms */
  40 # else
  41 #  error "Need GCC 3.1 or later to define type uint128_t"
  42 # endif
  43
  44 typedef uint8_t u8;
  45 typedef uint64_t u64;
  46 typedef int64_t s64;
  47
  48 /******************************************************************************/
  49 /*-
  50  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  51  *
  52  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  53  * using 64-bit coefficients called 'limbs',
  54  * and sometimes (for multiplication results) as
  55  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  56  * using 128-bit coefficients called 'widelimbs'.
  57  * A 4-limb representation is an 'felem';
  58  * a 7-widelimb representation is a 'widefelem'.
  59  * Even within felems, bits of adjacent limbs overlap, and we don't always
  60  * reduce the representations: we ensure that inputs to each felem
  61  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  62  * and fit into a 128-bit word without overflow. The coefficients are then
  63  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  64  * We only reduce to the unique minimal representation at the end of the
  65  * computation.
  66  */
  67
  68 typedef uint64_t limb;
  69 typedef uint128_t widelimb;
  70
  71 typedef limb felem[4];
  72 typedef widelimb widefelem[7];
  73
  74 /*
  75  * Field element represented as a byte arrary. 28*8 = 224 bits is also the
  76  * group order size for the elliptic curve, and we also use this type for
  77  * scalars for point multiplication.
  78  */
  79 typedef u8 felem_bytearray[28];
  80
  81 static const felem_bytearray nistp224_curve_params[5] = {
  82     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  83      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  84      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  85     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  86      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  87      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  88     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  89      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  90      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  91     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
  92      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
  93      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
  94     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
  95      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
  96      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
  97 };
  98
  99 /*-
 100  * Precomputed multiples of the standard generator
 101  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 102  * (0 for the point at infinity).
 103  * For each field element, slice a_0 is word 0, etc.
 104  *
 105  * The table has 2 * 16 elements, starting with the following:
 106  * index | bits    | point
 107  * ------+---------+------------------------------
 108  *     0 | 0 0 0 0 | 0G
 109  *     1 | 0 0 0 1 | 1G
 110  *     2 | 0 0 1 0 | 2^56G
 111  *     3 | 0 0 1 1 | (2^56 + 1)G
 112  *     4 | 0 1 0 0 | 2^112G
 113  *     5 | 0 1 0 1 | (2^112 + 1)G
 114  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 115  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 116  *     8 | 1 0 0 0 | 2^168G
 117  *     9 | 1 0 0 1 | (2^168 + 1)G
 118  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 119  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 120  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 121  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 122  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 123  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 124  * followed by a copy of this with each element multiplied by 2^28.
 125  *
 126  * The reason for this is so that we can clock bits into four different
 127  * locations when doing simple scalar multiplies against the base point,
 128  * and then another four locations using the second 16 elements.
 129  */
 130 static const felem gmul[2][16][3] = {
 131 {{{0, 0, 0, 0},
 132   {0, 0, 0, 0},
 133   {0, 0, 0, 0}},
 134  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 135   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 136   {1, 0, 0, 0}},
 137  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 138   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 139   {1, 0, 0, 0}},
 140  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 141   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 142   {1, 0, 0, 0}},
 143  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 144   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 145   {1, 0, 0, 0}},
 146  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 147   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 148   {1, 0, 0, 0}},
 149  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 150   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 151   {1, 0, 0, 0}},
 152  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 153   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 154   {1, 0, 0, 0}},
 155  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 156   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 157   {1, 0, 0, 0}},
 158  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 159   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 160   {1, 0, 0, 0}},
 161  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 162   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 163   {1, 0, 0, 0}},
 164  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 165   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 166   {1, 0, 0, 0}},
 167  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 168   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 169   {1, 0, 0, 0}},
 170  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 171   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 172   {1, 0, 0, 0}},
 173  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 174   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 175   {1, 0, 0, 0}},
 176  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 177   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 178   {1, 0, 0, 0}}},
 179 {{{0, 0, 0, 0},
 180   {0, 0, 0, 0},
 181   {0, 0, 0, 0}},
 182  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 183   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 184   {1, 0, 0, 0}},
 185  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 186   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 187   {1, 0, 0, 0}},
 188  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 189   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 190   {1, 0, 0, 0}},
 191  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 192   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 193   {1, 0, 0, 0}},
 194  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 195   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 196   {1, 0, 0, 0}},
 197  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 198   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 199   {1, 0, 0, 0}},
 200  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 201   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 202   {1, 0, 0, 0}},
 203  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 204   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 205   {1, 0, 0, 0}},
 206  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 207   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 208   {1, 0, 0, 0}},
 209  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 210   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 211   {1, 0, 0, 0}},
 212  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 213   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 214   {1, 0, 0, 0}},
 215  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 216   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 217   {1, 0, 0, 0}},
 218  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 219   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 220   {1, 0, 0, 0}},
 221  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 222   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 223   {1, 0, 0, 0}},
 224  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 225   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 226   {1, 0, 0, 0}}}
 227 };
 228
 229 /* Precomputation for the group generator. */
 230 struct nistp224_pre_comp_st {
 231     felem g_pre_comp[2][16][3];
 232     int references;
 233 };
 234
 235 const EC_METHOD *EC_GFp_nistp224_method(void)
 236 {
 237     static const EC_METHOD ret = {
 238         EC_FLAGS_DEFAULT_OCT,
 239         NID_X9_62_prime_field,
 240         ec_GFp_nistp224_group_init,
 241         ec_GFp_simple_group_finish,
 242         ec_GFp_simple_group_clear_finish,
 243         ec_GFp_nist_group_copy,
 244         ec_GFp_nistp224_group_set_curve,
 245         ec_GFp_simple_group_get_curve,
 246         ec_GFp_simple_group_get_degree,
 247         ec_GFp_simple_group_check_discriminant,
 248         ec_GFp_simple_point_init,
 249         ec_GFp_simple_point_finish,
 250         ec_GFp_simple_point_clear_finish,
 251         ec_GFp_simple_point_copy,
 252         ec_GFp_simple_point_set_to_infinity,
 253         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 254         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 255         ec_GFp_simple_point_set_affine_coordinates,
 256         ec_GFp_nistp224_point_get_affine_coordinates,
 257         0 /* point_set_compressed_coordinates */ ,
 258         0 /* point2oct */ ,
 259         0 /* oct2point */ ,
 260         ec_GFp_simple_add,
 261         ec_GFp_simple_dbl,
 262         ec_GFp_simple_invert,
 263         ec_GFp_simple_is_at_infinity,
 264         ec_GFp_simple_is_on_curve,
 265         ec_GFp_simple_cmp,
 266         ec_GFp_simple_make_affine,
 267         ec_GFp_simple_points_make_affine,
 268         ec_GFp_nistp224_points_mul,
 269         ec_GFp_nistp224_precompute_mult,
 270         ec_GFp_nistp224_have_precompute_mult,
 271         ec_GFp_nist_field_mul,
 272         ec_GFp_nist_field_sqr,
 273         0 /* field_div */ ,
 274         0 /* field_encode */ ,
 275         0 /* field_decode */ ,
 276         0                       /* field_set_to_one */
 277     };
 278
 279     return &ret;
 280 }
 281
 282 /*
 283  * Helper functions to convert field elements to/from internal representation
 284  */
 285 static void bin28_to_felem(felem out, const u8 in[28])
 286 {
 287     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 288     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 289     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 290     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 291 }
 292
 293 static void felem_to_bin28(u8 out[28], const felem in)
 294 {
 295     unsigned i;
 296     for (i = 0; i < 7; ++i) {
 297         out[i] = in[0] >> (8 * i);
 298         out[i + 7] = in[1] >> (8 * i);
 299         out[i + 14] = in[2] >> (8 * i);
 300         out[i + 21] = in[3] >> (8 * i);
 301     }
 302 }
 303
 304 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
 305 static void flip_endian(u8 *out, const u8 *in, unsigned len)
 306 {
 307     unsigned i;
 308     for (i = 0; i < len; ++i)
 309         out[i] = in[len - 1 - i];
 310 }
 311
 312 /* From OpenSSL BIGNUM to internal representation */
 313 static int BN_to_felem(felem out, const BIGNUM *bn)
 314 {
 315     felem_bytearray b_in;
 316     felem_bytearray b_out;
 317     unsigned num_bytes;
 318
 319     /* BN_bn2bin eats leading zeroes */
 320     memset(b_out, 0, sizeof(b_out));
 321     num_bytes = BN_num_bytes(bn);
 322     if (num_bytes > sizeof b_out) {
 323         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 324         return 0;
 325     }
 326     if (BN_is_negative(bn)) {
 327         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 328         return 0;
 329     }
 330     num_bytes = BN_bn2bin(bn, b_in);
 331     flip_endian(b_out, b_in, num_bytes);
 332     bin28_to_felem(out, b_out);
 333     return 1;
 334 }
 335
 336 /* From internal representation to OpenSSL BIGNUM */
 337 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 338 {
 339     felem_bytearray b_in, b_out;
 340     felem_to_bin28(b_in, in);
 341     flip_endian(b_out, b_in, sizeof b_out);
 342     return BN_bin2bn(b_out, sizeof b_out, out);
 343 }
 344
 345 /******************************************************************************/
 346 /*-
 347  *                              FIELD OPERATIONS
 348  *
 349  * Field operations, using the internal representation of field elements.
 350  * NB! These operations are specific to our point multiplication and cannot be
 351  * expected to be correct in general - e.g., multiplication with a large scalar
 352  * will cause an overflow.
 353  *
 354  */
 355
 356 static void felem_one(felem out)
 357 {
 358     out[0] = 1;
 359     out[1] = 0;
 360     out[2] = 0;
 361     out[3] = 0;
 362 }
 363
 364 static void felem_assign(felem out, const felem in)
 365 {
 366     out[0] = in[0];
 367     out[1] = in[1];
 368     out[2] = in[2];
 369     out[3] = in[3];
 370 }
 371
 372 /* Sum two field elements: out += in */
 373 static void felem_sum(felem out, const felem in)
 374 {
 375     out[0] += in[0];
 376     out[1] += in[1];
 377     out[2] += in[2];
 378     out[3] += in[3];
 379 }
 380
 381 /* Get negative value: out = -in */
 382 /* Assumes in[i] < 2^57 */
 383 static void felem_neg(felem out, const felem in)
 384 {
 385     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 386     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 387     static const limb two58m42m2 = (((limb) 1) << 58) -
 388         (((limb) 1) << 42) - (((limb) 1) << 2);
 389
 390     /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
 391     out[0] = two58p2 - in[0];
 392     out[1] = two58m42m2 - in[1];
 393     out[2] = two58m2 - in[2];
 394     out[3] = two58m2 - in[3];
 395 }
 396
 397 /* Subtract field elements: out -= in */
 398 /* Assumes in[i] < 2^57 */
 399 static void felem_diff(felem out, const felem in)
 400 {
 401     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 402     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 403     static const limb two58m42m2 = (((limb) 1) << 58) -
 404         (((limb) 1) << 42) - (((limb) 1) << 2);
 405
 406     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 407     out[0] += two58p2;
 408     out[1] += two58m42m2;
 409     out[2] += two58m2;
 410     out[3] += two58m2;
 411
 412     out[0] -= in[0];
 413     out[1] -= in[1];
 414     out[2] -= in[2];
 415     out[3] -= in[3];
 416 }
 417
 418 /* Subtract in unreduced 128-bit mode: out -= in */
 419 /* Assumes in[i] < 2^119 */
 420 static void widefelem_diff(widefelem out, const widefelem in)
 421 {
 422     static const widelimb two120 = ((widelimb) 1) << 120;
 423     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 424         (((widelimb) 1) << 64);
 425     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 426         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 427
 428     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 429     out[0] += two120;
 430     out[1] += two120m64;
 431     out[2] += two120m64;
 432     out[3] += two120;
 433     out[4] += two120m104m64;
 434     out[5] += two120m64;
 435     out[6] += two120m64;
 436
 437     out[0] -= in[0];
 438     out[1] -= in[1];
 439     out[2] -= in[2];
 440     out[3] -= in[3];
 441     out[4] -= in[4];
 442     out[5] -= in[5];
 443     out[6] -= in[6];
 444 }
 445
 446 /* Subtract in mixed mode: out128 -= in64 */
 447 /* in[i] < 2^63 */
 448 static void felem_diff_128_64(widefelem out, const felem in)
 449 {
 450     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 451         (((widelimb) 1) << 8);
 452     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 453         (((widelimb) 1) << 8);
 454     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 455         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 456
 457     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 458     out[0] += two64p8;
 459     out[1] += two64m48m8;
 460     out[2] += two64m8;
 461     out[3] += two64m8;
 462
 463     out[0] -= in[0];
 464     out[1] -= in[1];
 465     out[2] -= in[2];
 466     out[3] -= in[3];
 467 }
 468
 469 /*
 470  * Multiply a field element by a scalar: out = out * scalar The scalars we
 471  * actually use are small, so results fit without overflow
 472  */
 473 static void felem_scalar(felem out, const limb scalar)
 474 {
 475     out[0] *= scalar;
 476     out[1] *= scalar;
 477     out[2] *= scalar;
 478     out[3] *= scalar;
 479 }
 480
 481 /*
 482  * Multiply an unreduced field element by a scalar: out = out * scalar The
 483  * scalars we actually use are small, so results fit without overflow
 484  */
 485 static void widefelem_scalar(widefelem out, const widelimb scalar)
 486 {
 487     out[0] *= scalar;
 488     out[1] *= scalar;
 489     out[2] *= scalar;
 490     out[3] *= scalar;
 491     out[4] *= scalar;
 492     out[5] *= scalar;
 493     out[6] *= scalar;
 494 }
 495
 496 /* Square a field element: out = in^2 */
 497 static void felem_square(widefelem out, const felem in)
 498 {
 499     limb tmp0, tmp1, tmp2;
 500     tmp0 = 2 * in[0];
 501     tmp1 = 2 * in[1];
 502     tmp2 = 2 * in[2];
 503     out[0] = ((widelimb) in[0]) * in[0];
 504     out[1] = ((widelimb) in[0]) * tmp1;
 505     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 506     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 507     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 508     out[5] = ((widelimb) in[3]) * tmp2;
 509     out[6] = ((widelimb) in[3]) * in[3];
 510 }
 511
 512 /* Multiply two field elements: out = in1 * in2 */
 513 static void felem_mul(widefelem out, const felem in1, const felem in2)
 514 {
 515     out[0] = ((widelimb) in1[0]) * in2[0];
 516     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 517     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 518              ((widelimb) in1[2]) * in2[0];
 519     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 520              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 521     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 522              ((widelimb) in1[3]) * in2[1];
 523     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 524     out[6] = ((widelimb) in1[3]) * in2[3];
 525 }
 526
 527 /*-
 528  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 529  * Requires in[i] < 2^126,
 530  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 531 static void felem_reduce(felem out, const widefelem in)
 532 {
 533     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 534         (((widelimb) 1) << 15);
 535     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 536         (((widelimb) 1) << 71);
 537     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 538         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 539     widelimb output[5];
 540
 541     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 542     output[0] = in[0] + two127p15;
 543     output[1] = in[1] + two127m71m55;
 544     output[2] = in[2] + two127m71;
 545     output[3] = in[3];
 546     output[4] = in[4];
 547
 548     /* Eliminate in[4], in[5], in[6] */
 549     output[4] += in[6] >> 16;
 550     output[3] += (in[6] & 0xffff) << 40;
 551     output[2] -= in[6];
 552
 553     output[3] += in[5] >> 16;
 554     output[2] += (in[5] & 0xffff) << 40;
 555     output[1] -= in[5];
 556
 557     output[2] += output[4] >> 16;
 558     output[1] += (output[4] & 0xffff) << 40;
 559     output[0] -= output[4];
 560
 561     /* Carry 2 -> 3 -> 4 */
 562     output[3] += output[2] >> 56;
 563     output[2] &= 0x00ffffffffffffff;
 564
 565     output[4] = output[3] >> 56;
 566     output[3] &= 0x00ffffffffffffff;
 567
 568     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 569
 570     /* Eliminate output[4] */
 571     output[2] += output[4] >> 16;
 572     /* output[2] < 2^56 + 2^56 = 2^57 */
 573     output[1] += (output[4] & 0xffff) << 40;
 574     output[0] -= output[4];
 575
 576     /* Carry 0 -> 1 -> 2 -> 3 */
 577     output[1] += output[0] >> 56;
 578     out[0] = output[0] & 0x00ffffffffffffff;
 579
 580     output[2] += output[1] >> 56;
 581     /* output[2] < 2^57 + 2^72 */
 582     out[1] = output[1] & 0x00ffffffffffffff;
 583     output[3] += output[2] >> 56;
 584     /* output[3] <= 2^56 + 2^16 */
 585     out[2] = output[2] & 0x00ffffffffffffff;
 586
 587     /*-
 588      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 589      * out[3] <= 2^56 + 2^16 (due to final carry),
 590      * so out < 2*p
 591      */
 592     out[3] = output[3];
 593 }
 594
 595 static void felem_square_reduce(felem out, const felem in)
 596 {
 597     widefelem tmp;
 598     felem_square(tmp, in);
 599     felem_reduce(out, tmp);
 600 }
 601
 602 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 603 {
 604     widefelem tmp;
 605     felem_mul(tmp, in1, in2);
 606     felem_reduce(out, tmp);
 607 }
 608
 609 /*
 610  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 611  * call felem_reduce first)
 612  */
 613 static void felem_contract(felem out, const felem in)
 614 {
 615     static const int64_t two56 = ((limb) 1) << 56;
 616     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 617     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 618     int64_t tmp[4], a;
 619     tmp[0] = in[0];
 620     tmp[1] = in[1];
 621     tmp[2] = in[2];
 622     tmp[3] = in[3];
 623     /* Case 1: a = 1 iff in >= 2^224 */
 624     a = (in[3] >> 56);
 625     tmp[0] -= a;
 626     tmp[1] += a << 40;
 627     tmp[3] &= 0x00ffffffffffffff;
 628     /*
 629      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 630      * and the lower part is non-zero
 631      */
 632     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 633         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 634     a &= 0x00ffffffffffffff;
 635     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 636     a = (a - 1) >> 63;
 637     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 638     tmp[3] &= a ^ 0xffffffffffffffff;
 639     tmp[2] &= a ^ 0xffffffffffffffff;
 640     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 641     tmp[0] -= 1 & a;
 642
 643     /*
 644      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 645      * non-zero, so we only need one step
 646      */
 647     a = tmp[0] >> 63;
 648     tmp[0] += two56 & a;
 649     tmp[1] -= 1 & a;
 650
 651     /* carry 1 -> 2 -> 3 */
 652     tmp[2] += tmp[1] >> 56;
 653     tmp[1] &= 0x00ffffffffffffff;
 654
 655     tmp[3] += tmp[2] >> 56;
 656     tmp[2] &= 0x00ffffffffffffff;
 657
 658     /* Now 0 <= out < p */
 659     out[0] = tmp[0];
 660     out[1] = tmp[1];
 661     out[2] = tmp[2];
 662     out[3] = tmp[3];
 663 }
 664
 665 /*
 666  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 667  * elements are reduced to in < 2^225, so we only need to check three cases:
 668  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 669  */
 670 static limb felem_is_zero(const felem in)
 671 {
 672     limb zero, two224m96p1, two225m97p2;
 673
 674     zero = in[0] | in[1] | in[2] | in[3];
 675     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 676     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 677         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 678     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 679     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 680         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 681     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 682     return (zero | two224m96p1 | two225m97p2);
 683 }
 684
 685 static limb felem_is_zero_int(const felem in)
 686 {
 687     return (int)(felem_is_zero(in) & ((limb) 1));
 688 }
 689
 690 /* Invert a field element */
 691 /* Computation chain copied from djb's code */
 692 static void felem_inv(felem out, const felem in)
 693 {
 694     felem ftmp, ftmp2, ftmp3, ftmp4;
 695     widefelem tmp;
 696     unsigned i;
 697
 698     felem_square(tmp, in);
 699     felem_reduce(ftmp, tmp);    /* 2 */
 700     felem_mul(tmp, in, ftmp);
 701     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 702     felem_square(tmp, ftmp);
 703     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 704     felem_mul(tmp, in, ftmp);
 705     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 706     felem_square(tmp, ftmp);
 707     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 708     felem_square(tmp, ftmp2);
 709     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 710     felem_square(tmp, ftmp2);
 711     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 712     felem_mul(tmp, ftmp2, ftmp);
 713     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 714     felem_square(tmp, ftmp);
 715     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 716     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 717         felem_square(tmp, ftmp2);
 718         felem_reduce(ftmp2, tmp);
 719     }
 720     felem_mul(tmp, ftmp2, ftmp);
 721     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 722     felem_square(tmp, ftmp2);
 723     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 724     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 725         felem_square(tmp, ftmp3);
 726         felem_reduce(ftmp3, tmp);
 727     }
 728     felem_mul(tmp, ftmp3, ftmp2);
 729     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 730     felem_square(tmp, ftmp2);
 731     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 732     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 733         felem_square(tmp, ftmp3);
 734         felem_reduce(ftmp3, tmp);
 735     }
 736     felem_mul(tmp, ftmp3, ftmp2);
 737     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 738     felem_square(tmp, ftmp3);
 739     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 740     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 741         felem_square(tmp, ftmp4);
 742         felem_reduce(ftmp4, tmp);
 743     }
 744     felem_mul(tmp, ftmp3, ftmp4);
 745     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 746     felem_square(tmp, ftmp3);
 747     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 748     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 749         felem_square(tmp, ftmp4);
 750         felem_reduce(ftmp4, tmp);
 751     }
 752     felem_mul(tmp, ftmp2, ftmp4);
 753     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 754     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 755         felem_square(tmp, ftmp2);
 756         felem_reduce(ftmp2, tmp);
 757     }
 758     felem_mul(tmp, ftmp2, ftmp);
 759     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 760     felem_square(tmp, ftmp);
 761     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 762     felem_mul(tmp, ftmp, in);
 763     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 764     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 765         felem_square(tmp, ftmp);
 766         felem_reduce(ftmp, tmp);
 767     }
 768     felem_mul(tmp, ftmp, ftmp3);
 769     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 770 }
 771
 772 /*
 773  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 774  * out to itself.
 775  */
 776 static void copy_conditional(felem out, const felem in, limb icopy)
 777 {
 778     unsigned i;
 779     /*
 780      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 781      */
 782     const limb copy = -icopy;
 783     for (i = 0; i < 4; ++i) {
 784         const limb tmp = copy & (in[i] ^ out[i]);
 785         out[i] ^= tmp;
 786     }
 787 }
 788
 789 /******************************************************************************/
 790 /*-
 791  *                       ELLIPTIC CURVE POINT OPERATIONS
 792  *
 793  * Points are represented in Jacobian projective coordinates:
 794  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 795  * or to the point at infinity if Z == 0.
 796  *
 797  */
 798
 799 /*-
 800  * Double an elliptic curve point:
 801  * (X', Y', Z') = 2 * (X, Y, Z), where
 802  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 803  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
 804  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 805  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 806  * while x_out == y_in is not (maybe this works, but it's not tested).
 807  */
 808 static void
 809 point_double(felem x_out, felem y_out, felem z_out,
 810              const felem x_in, const felem y_in, const felem z_in)
 811 {
 812     widefelem tmp, tmp2;
 813     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 814
 815     felem_assign(ftmp, x_in);
 816     felem_assign(ftmp2, x_in);
 817
 818     /* delta = z^2 */
 819     felem_square(tmp, z_in);
 820     felem_reduce(delta, tmp);
 821
 822     /* gamma = y^2 */
 823     felem_square(tmp, y_in);
 824     felem_reduce(gamma, tmp);
 825
 826     /* beta = x*gamma */
 827     felem_mul(tmp, x_in, gamma);
 828     felem_reduce(beta, tmp);
 829
 830     /* alpha = 3*(x-delta)*(x+delta) */
 831     felem_diff(ftmp, delta);
 832     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 833     felem_sum(ftmp2, delta);
 834     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 835     felem_scalar(ftmp2, 3);
 836     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 837     felem_mul(tmp, ftmp, ftmp2);
 838     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 839     felem_reduce(alpha, tmp);
 840
 841     /* x' = alpha^2 - 8*beta */
 842     felem_square(tmp, alpha);
 843     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 844     felem_assign(ftmp, beta);
 845     felem_scalar(ftmp, 8);
 846     /* ftmp[i] < 8 * 2^57 = 2^60 */
 847     felem_diff_128_64(tmp, ftmp);
 848     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 849     felem_reduce(x_out, tmp);
 850
 851     /* z' = (y + z)^2 - gamma - delta */
 852     felem_sum(delta, gamma);
 853     /* delta[i] < 2^57 + 2^57 = 2^58 */
 854     felem_assign(ftmp, y_in);
 855     felem_sum(ftmp, z_in);
 856     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 857     felem_square(tmp, ftmp);
 858     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 859     felem_diff_128_64(tmp, delta);
 860     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 861     felem_reduce(z_out, tmp);
 862
 863     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 864     felem_scalar(beta, 4);
 865     /* beta[i] < 4 * 2^57 = 2^59 */
 866     felem_diff(beta, x_out);
 867     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 868     felem_mul(tmp, alpha, beta);
 869     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 870     felem_square(tmp2, gamma);
 871     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 872     widefelem_scalar(tmp2, 8);
 873     /* tmp2[i] < 8 * 2^116 = 2^119 */
 874     widefelem_diff(tmp, tmp2);
 875     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 876     felem_reduce(y_out, tmp);
 877 }
 878
 879 /*-
 880  * Add two elliptic curve points:
 881  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 882  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 883  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 884  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 885  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 886  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 887  *
 888  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 889  */
 890
 891 /*
 892  * This function is not entirely constant-time: it includes a branch for
 893  * checking whether the two input points are equal, (while not equal to the
 894  * point at infinity). This case never happens during single point
 895  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 896  */
 897 static void point_add(felem x3, felem y3, felem z3,
 898                       const felem x1, const felem y1, const felem z1,
 899                       const int mixed, const felem x2, const felem y2,
 900                       const felem z2)
 901 {
 902     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 903     widefelem tmp, tmp2;
 904     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 905
 906     if (!mixed) {
 907         /* ftmp2 = z2^2 */
 908         felem_square(tmp, z2);
 909         felem_reduce(ftmp2, tmp);
 910
 911         /* ftmp4 = z2^3 */
 912         felem_mul(tmp, ftmp2, z2);
 913         felem_reduce(ftmp4, tmp);
 914
 915         /* ftmp4 = z2^3*y1 */
 916         felem_mul(tmp2, ftmp4, y1);
 917         felem_reduce(ftmp4, tmp2);
 918
 919         /* ftmp2 = z2^2*x1 */
 920         felem_mul(tmp2, ftmp2, x1);
 921         felem_reduce(ftmp2, tmp2);
 922     } else {
 923         /*
 924          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 925          */
 926
 927         /* ftmp4 = z2^3*y1 */
 928         felem_assign(ftmp4, y1);
 929
 930         /* ftmp2 = z2^2*x1 */
 931         felem_assign(ftmp2, x1);
 932     }
 933
 934     /* ftmp = z1^2 */
 935     felem_square(tmp, z1);
 936     felem_reduce(ftmp, tmp);
 937
 938     /* ftmp3 = z1^3 */
 939     felem_mul(tmp, ftmp, z1);
 940     felem_reduce(ftmp3, tmp);
 941
 942     /* tmp = z1^3*y2 */
 943     felem_mul(tmp, ftmp3, y2);
 944     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 945
 946     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 947     felem_diff_128_64(tmp, ftmp4);
 948     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 949     felem_reduce(ftmp3, tmp);
 950
 951     /* tmp = z1^2*x2 */
 952     felem_mul(tmp, ftmp, x2);
 953     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 954
 955     /* ftmp = z1^2*x2 - z2^2*x1 */
 956     felem_diff_128_64(tmp, ftmp2);
 957     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 958     felem_reduce(ftmp, tmp);
 959
 960     /*
 961      * the formulae are incorrect if the points are equal so we check for
 962      * this and do doubling if this happens
 963      */
 964     x_equal = felem_is_zero(ftmp);
 965     y_equal = felem_is_zero(ftmp3);
 966     z1_is_zero = felem_is_zero(z1);
 967     z2_is_zero = felem_is_zero(z2);
 968     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
 969     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
 970         point_double(x3, y3, z3, x1, y1, z1);
 971         return;
 972     }
 973
 974     /* ftmp5 = z1*z2 */
 975     if (!mixed) {
 976         felem_mul(tmp, z1, z2);
 977         felem_reduce(ftmp5, tmp);
 978     } else {
 979         /* special case z2 = 0 is handled later */
 980         felem_assign(ftmp5, z1);
 981     }
 982
 983     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
 984     felem_mul(tmp, ftmp, ftmp5);
 985     felem_reduce(z_out, tmp);
 986
 987     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
 988     felem_assign(ftmp5, ftmp);
 989     felem_square(tmp, ftmp);
 990     felem_reduce(ftmp, tmp);
 991
 992     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
 993     felem_mul(tmp, ftmp, ftmp5);
 994     felem_reduce(ftmp5, tmp);
 995
 996     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
 997     felem_mul(tmp, ftmp2, ftmp);
 998     felem_reduce(ftmp2, tmp);
 999
1000     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1001     felem_mul(tmp, ftmp4, ftmp5);
1002     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1003
1004     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1005     felem_square(tmp2, ftmp3);
1006     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1007
1008     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1009     felem_diff_128_64(tmp2, ftmp5);
1010     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1011
1012     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1013     felem_assign(ftmp5, ftmp2);
1014     felem_scalar(ftmp5, 2);
1015     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1016
1017     /*-
1018      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1019      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1020      */
1021     felem_diff_128_64(tmp2, ftmp5);
1022     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1023     felem_reduce(x_out, tmp2);
1024
1025     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1026     felem_diff(ftmp2, x_out);
1027     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1028
1029     /*
1030      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1031      */
1032     felem_mul(tmp2, ftmp3, ftmp2);
1033     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1034
1035     /*-
1036      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1037      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1038      */
1039     widefelem_diff(tmp2, tmp);
1040     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1041     felem_reduce(y_out, tmp2);
1042
1043     /*
1044      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1045      * the point at infinity, so we need to check for this separately
1046      */
1047
1048     /*
1049      * if point 1 is at infinity, copy point 2 to output, and vice versa
1050      */
1051     copy_conditional(x_out, x2, z1_is_zero);
1052     copy_conditional(x_out, x1, z2_is_zero);
1053     copy_conditional(y_out, y2, z1_is_zero);
1054     copy_conditional(y_out, y1, z2_is_zero);
1055     copy_conditional(z_out, z2, z1_is_zero);
1056     copy_conditional(z_out, z1, z2_is_zero);
1057     felem_assign(x3, x_out);
1058     felem_assign(y3, y_out);
1059     felem_assign(z3, z_out);
1060 }
1061
1062 /*
1063  * select_point selects the |idx|th point from a precomputation table and
1064  * copies it to out.
1065  * The pre_comp array argument should be size of |size| argument
1066  */
1067 static void select_point(const u64 idx, unsigned int size,
1068                          const felem pre_comp[][3], felem out[3])
1069 {
1070     unsigned i, j;
1071     limb *outlimbs = &out[0][0];
1072
1073     memset(out, 0, sizeof(*out) * 3);
1074     for (i = 0; i < size; i++) {
1075         const limb *inlimbs = &pre_comp[i][0][0];
1076         u64 mask = i ^ idx;
1077         mask |= mask >> 4;
1078         mask |= mask >> 2;
1079         mask |= mask >> 1;
1080         mask &= 1;
1081         mask--;
1082         for (j = 0; j < 4 * 3; j++)
1083             outlimbs[j] |= inlimbs[j] & mask;
1084     }
1085 }
1086
1087 /* get_bit returns the |i|th bit in |in| */
1088 static char get_bit(const felem_bytearray in, unsigned i)
1089 {
1090     if (i >= 224)
1091         return 0;
1092     return (in[i >> 3] >> (i & 7)) & 1;
1093 }
1094
1095 /*
1096  * Interleaved point multiplication using precomputed point multiples: The
1097  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1098  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1099  * generator, using certain (large) precomputed multiples in g_pre_comp.
1100  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1101  */
1102 static void batch_mul(felem x_out, felem y_out, felem z_out,
1103                       const felem_bytearray scalars[],
1104                       const unsigned num_points, const u8 *g_scalar,
1105                       const int mixed, const felem pre_comp[][17][3],
1106                       const felem g_pre_comp[2][16][3])
1107 {
1108     int i, skip;
1109     unsigned num;
1110     unsigned gen_mul = (g_scalar != NULL);
1111     felem nq[3], tmp[4];
1112     u64 bits;
1113     u8 sign, digit;
1114
1115     /* set nq to the point at infinity */
1116     memset(nq, 0, sizeof(nq));
1117
1118     /*
1119      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1120      * of the generator (two in each of the last 28 rounds) and additions of
1121      * other points multiples (every 5th round).
1122      */
1123     skip = 1;                   /* save two point operations in the first
1124                                  * round */
1125     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1126         /* double */
1127         if (!skip)
1128             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1129
1130         /* add multiples of the generator */
1131         if (gen_mul && (i <= 27)) {
1132             /* first, look 28 bits upwards */
1133             bits = get_bit(g_scalar, i + 196) << 3;
1134             bits |= get_bit(g_scalar, i + 140) << 2;
1135             bits |= get_bit(g_scalar, i + 84) << 1;
1136             bits |= get_bit(g_scalar, i + 28);
1137             /* select the point to add, in constant time */
1138             select_point(bits, 16, g_pre_comp[1], tmp);
1139
1140             if (!skip) {
1141                 /* value 1 below is argument for "mixed" */
1142                 point_add(nq[0], nq[1], nq[2],
1143                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1144             } else {
1145                 memcpy(nq, tmp, 3 * sizeof(felem));
1146                 skip = 0;
1147             }
1148
1149             /* second, look at the current position */
1150             bits = get_bit(g_scalar, i + 168) << 3;
1151             bits |= get_bit(g_scalar, i + 112) << 2;
1152             bits |= get_bit(g_scalar, i + 56) << 1;
1153             bits |= get_bit(g_scalar, i);
1154             /* select the point to add, in constant time */
1155             select_point(bits, 16, g_pre_comp[0], tmp);
1156             point_add(nq[0], nq[1], nq[2],
1157                       nq[0], nq[1], nq[2],
1158                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1159         }
1160
1161         /* do other additions every 5 doublings */
1162         if (num_points && (i % 5 == 0)) {
1163             /* loop over all scalars */
1164             for (num = 0; num < num_points; ++num) {
1165                 bits = get_bit(scalars[num], i + 4) << 5;
1166                 bits |= get_bit(scalars[num], i + 3) << 4;
1167                 bits |= get_bit(scalars[num], i + 2) << 3;
1168                 bits |= get_bit(scalars[num], i + 1) << 2;
1169                 bits |= get_bit(scalars[num], i) << 1;
1170                 bits |= get_bit(scalars[num], i - 1);
1171                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1172
1173                 /* select the point to add or subtract */
1174                 select_point(digit, 17, pre_comp[num], tmp);
1175                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1176                                             * point */
1177                 copy_conditional(tmp[1], tmp[3], sign);
1178
1179                 if (!skip) {
1180                     point_add(nq[0], nq[1], nq[2],
1181                               nq[0], nq[1], nq[2],
1182                               mixed, tmp[0], tmp[1], tmp[2]);
1183                 } else {
1184                     memcpy(nq, tmp, 3 * sizeof(felem));
1185                     skip = 0;
1186                 }
1187             }
1188         }
1189     }
1190     felem_assign(x_out, nq[0]);
1191     felem_assign(y_out, nq[1]);
1192     felem_assign(z_out, nq[2]);
1193 }
1194
1195 /******************************************************************************/
1196 /*
1197  * FUNCTIONS TO MANAGE PRECOMPUTATION
1198  */
1199
1200 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1201 {
1202     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1203
1204     if (!ret) {
1205         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1206         return ret;
1207     }
1208     ret->references = 1;
1209     return ret;
1210 }
1211
1212 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1213 {
1214     if (p != NULL)
1215         CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1216     return p;
1217 }
1218
1219 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1220 {
1221     if (p == NULL
1222         || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
1223         return;
1224     OPENSSL_free(p);
1225 }
1226
1227 /******************************************************************************/
1228 /*
1229  * OPENSSL EC_METHOD FUNCTIONS
1230  */
1231
1232 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1233 {
1234     int ret;
1235     ret = ec_GFp_simple_group_init(group);
1236     group->a_is_minus3 = 1;
1237     return ret;
1238 }
1239
1240 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1241                                     const BIGNUM *a, const BIGNUM *b,
1242                                     BN_CTX *ctx)
1243 {
1244     int ret = 0;
1245     BN_CTX *new_ctx = NULL;
1246     BIGNUM *curve_p, *curve_a, *curve_b;
1247
1248     if (ctx == NULL)
1249         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1250             return 0;
1251     BN_CTX_start(ctx);
1252     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1253         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1254         ((curve_b = BN_CTX_get(ctx)) == NULL))
1255         goto err;
1256     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1257     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1258     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1259     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1260         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1261               EC_R_WRONG_CURVE_PARAMETERS);
1262         goto err;
1263     }
1264     group->field_mod_func = BN_nist_mod_224;
1265     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1266  err:
1267     BN_CTX_end(ctx);
1268     BN_CTX_free(new_ctx);
1269     return ret;
1270 }
1271
1272 /*
1273  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1274  * (X/Z^2, Y/Z^3)
1275  */
1276 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1277                                                  const EC_POINT *point,
1278                                                  BIGNUM *x, BIGNUM *y,
1279                                                  BN_CTX *ctx)
1280 {
1281     felem z1, z2, x_in, y_in, x_out, y_out;
1282     widefelem tmp;
1283
1284     if (EC_POINT_is_at_infinity(group, point)) {
1285         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1286               EC_R_POINT_AT_INFINITY);
1287         return 0;
1288     }
1289     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1290         (!BN_to_felem(z1, point->Z)))
1291         return 0;
1292     felem_inv(z2, z1);
1293     felem_square(tmp, z2);
1294     felem_reduce(z1, tmp);
1295     felem_mul(tmp, x_in, z1);
1296     felem_reduce(x_in, tmp);
1297     felem_contract(x_out, x_in);
1298     if (x != NULL) {
1299         if (!felem_to_BN(x, x_out)) {
1300             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1301                   ERR_R_BN_LIB);
1302             return 0;
1303         }
1304     }
1305     felem_mul(tmp, z1, z2);
1306     felem_reduce(z1, tmp);
1307     felem_mul(tmp, y_in, z1);
1308     felem_reduce(y_in, tmp);
1309     felem_contract(y_out, y_in);
1310     if (y != NULL) {
1311         if (!felem_to_BN(y, y_out)) {
1312             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1313                   ERR_R_BN_LIB);
1314             return 0;
1315         }
1316     }
1317     return 1;
1318 }
1319
1320 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1321                                felem tmp_felems[ /* num+1 */ ])
1322 {
1323     /*
1324      * Runs in constant time, unless an input is the point at infinity (which
1325      * normally shouldn't happen).
1326      */
1327     ec_GFp_nistp_points_make_affine_internal(num,
1328                                              points,
1329                                              sizeof(felem),
1330                                              tmp_felems,
1331                                              (void (*)(void *))felem_one,
1332                                              (int (*)(const void *))
1333                                              felem_is_zero_int,
1334                                              (void (*)(void *, const void *))
1335                                              felem_assign,
1336                                              (void (*)(void *, const void *))
1337                                              felem_square_reduce, (void (*)
1338                                                                    (void *,
1339                                                                     const void
1340                                                                     *,
1341                                                                     const void
1342                                                                     *))
1343                                              felem_mul_reduce,
1344                                              (void (*)(void *, const void *))
1345                                              felem_inv,
1346                                              (void (*)(void *, const void *))
1347                                              felem_contract);
1348 }
1349
1350 /*
1351  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1352  * values Result is stored in r (r can equal one of the inputs).
1353  */
1354 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1355                                const BIGNUM *scalar, size_t num,
1356                                const EC_POINT *points[],
1357                                const BIGNUM *scalars[], BN_CTX *ctx)
1358 {
1359     int ret = 0;
1360     int j;
1361     unsigned i;
1362     int mixed = 0;
1363     BN_CTX *new_ctx = NULL;
1364     BIGNUM *x, *y, *z, *tmp_scalar;
1365     felem_bytearray g_secret;
1366     felem_bytearray *secrets = NULL;
1367     felem (*pre_comp)[17][3] = NULL;
1368     felem *tmp_felems = NULL;
1369     felem_bytearray tmp;
1370     unsigned num_bytes;
1371     int have_pre_comp = 0;
1372     size_t num_points = num;
1373     felem x_in, y_in, z_in, x_out, y_out, z_out;
1374     NISTP224_PRE_COMP *pre = NULL;
1375     const felem(*g_pre_comp)[16][3] = NULL;
1376     EC_POINT *generator = NULL;
1377     const EC_POINT *p = NULL;
1378     const BIGNUM *p_scalar = NULL;
1379
1380     if (ctx == NULL)
1381         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1382             return 0;
1383     BN_CTX_start(ctx);
1384     if (((x = BN_CTX_get(ctx)) == NULL) ||
1385         ((y = BN_CTX_get(ctx)) == NULL) ||
1386         ((z = BN_CTX_get(ctx)) == NULL) ||
1387         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1388         goto err;
1389
1390     if (scalar != NULL) {
1391         pre = group->pre_comp.nistp224;
1392         if (pre)
1393             /* we have precomputation, try to use it */
1394             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1395         else
1396             /* try to use the standard precomputation */
1397             g_pre_comp = &gmul[0];
1398         generator = EC_POINT_new(group);
1399         if (generator == NULL)
1400             goto err;
1401         /* get the generator from precomputation */
1402         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1403             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1404             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1405             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1406             goto err;
1407         }
1408         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1409                                                       generator, x, y, z,
1410                                                       ctx))
1411             goto err;
1412         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1413             /* precomputation matches generator */
1414             have_pre_comp = 1;
1415         else
1416             /*
1417              * we don't have valid precomputation: treat the generator as a
1418              * random point
1419              */
1420             num_points = num_points + 1;
1421     }
1422
1423     if (num_points > 0) {
1424         if (num_points >= 3) {
1425             /*
1426              * unless we precompute multiples for just one or two points,
1427              * converting those into affine form is time well spent
1428              */
1429             mixed = 1;
1430         }
1431         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1432         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1433         if (mixed)
1434             tmp_felems =
1435                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1436         if ((secrets == NULL) || (pre_comp == NULL)
1437             || (mixed && (tmp_felems == NULL))) {
1438             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1439             goto err;
1440         }
1441
1442         /*
1443          * we treat NULL scalars as 0, and NULL points as points at infinity,
1444          * i.e., they contribute nothing to the linear combination
1445          */
1446         for (i = 0; i < num_points; ++i) {
1447             if (i == num)
1448                 /* the generator */
1449             {
1450                 p = EC_GROUP_get0_generator(group);
1451                 p_scalar = scalar;
1452             } else
1453                 /* the i^th point */
1454             {
1455                 p = points[i];
1456                 p_scalar = scalars[i];
1457             }
1458             if ((p_scalar != NULL) && (p != NULL)) {
1459                 /* reduce scalar to 0 <= scalar < 2^224 */
1460                 if ((BN_num_bits(p_scalar) > 224)
1461                     || (BN_is_negative(p_scalar))) {
1462                     /*
1463                      * this is an unusual input, and we don't guarantee
1464                      * constant-timeness
1465                      */
1466                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1467                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1468                         goto err;
1469                     }
1470                     num_bytes = BN_bn2bin(tmp_scalar, tmp);
1471                 } else
1472                     num_bytes = BN_bn2bin(p_scalar, tmp);
1473                 flip_endian(secrets[i], tmp, num_bytes);
1474                 /* precompute multiples */
1475                 if ((!BN_to_felem(x_out, p->X)) ||
1476                     (!BN_to_felem(y_out, p->Y)) ||
1477                     (!BN_to_felem(z_out, p->Z)))
1478                     goto err;
1479                 felem_assign(pre_comp[i][1][0], x_out);
1480                 felem_assign(pre_comp[i][1][1], y_out);
1481                 felem_assign(pre_comp[i][1][2], z_out);
1482                 for (j = 2; j <= 16; ++j) {
1483                     if (j & 1) {
1484                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1485                                   pre_comp[i][j][2], pre_comp[i][1][0],
1486                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1487                                   pre_comp[i][j - 1][0],
1488                                   pre_comp[i][j - 1][1],
1489                                   pre_comp[i][j - 1][2]);
1490                     } else {
1491                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1492                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1493                                      pre_comp[i][j / 2][1],
1494                                      pre_comp[i][j / 2][2]);
1495                     }
1496                 }
1497             }
1498         }
1499         if (mixed)
1500             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1501     }
1502
1503     /* the scalar for the generator */
1504     if ((scalar != NULL) && (have_pre_comp)) {
1505         memset(g_secret, 0, sizeof(g_secret));
1506         /* reduce scalar to 0 <= scalar < 2^224 */
1507         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1508             /*
1509              * this is an unusual input, and we don't guarantee
1510              * constant-timeness
1511              */
1512             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1513                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1514                 goto err;
1515             }
1516             num_bytes = BN_bn2bin(tmp_scalar, tmp);
1517         } else
1518             num_bytes = BN_bn2bin(scalar, tmp);
1519         flip_endian(g_secret, tmp, num_bytes);
1520         /* do the multiplication with generator precomputation */
1521         batch_mul(x_out, y_out, z_out,
1522                   (const felem_bytearray(*))secrets, num_points,
1523                   g_secret,
1524                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1525     } else
1526         /* do the multiplication without generator precomputation */
1527         batch_mul(x_out, y_out, z_out,
1528                   (const felem_bytearray(*))secrets, num_points,
1529                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1530     /* reduce the output to its unique minimal representation */
1531     felem_contract(x_in, x_out);
1532     felem_contract(y_in, y_out);
1533     felem_contract(z_in, z_out);
1534     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1535         (!felem_to_BN(z, z_in))) {
1536         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1537         goto err;
1538     }
1539     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1540
1541  err:
1542     BN_CTX_end(ctx);
1543     EC_POINT_free(generator);
1544     BN_CTX_free(new_ctx);
1545     OPENSSL_free(secrets);
1546     OPENSSL_free(pre_comp);
1547     OPENSSL_free(tmp_felems);
1548     return ret;
1549 }
1550
1551 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1552 {
1553     int ret = 0;
1554     NISTP224_PRE_COMP *pre = NULL;
1555     int i, j;
1556     BN_CTX *new_ctx = NULL;
1557     BIGNUM *x, *y;
1558     EC_POINT *generator = NULL;
1559     felem tmp_felems[32];
1560
1561     /* throw away old precomputation */
1562     EC_pre_comp_free(group);
1563     if (ctx == NULL)
1564         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1565             return 0;
1566     BN_CTX_start(ctx);
1567     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1568         goto err;
1569     /* get the generator */
1570     if (group->generator == NULL)
1571         goto err;
1572     generator = EC_POINT_new(group);
1573     if (generator == NULL)
1574         goto err;
1575     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1576     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1577     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1578         goto err;
1579     if ((pre = nistp224_pre_comp_new()) == NULL)
1580         goto err;
1581     /*
1582      * if the generator is the standard one, use built-in precomputation
1583      */
1584     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1585         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1586         ret = 1;
1587         goto err;
1588     }
1589     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1590         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1591         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1592         goto err;
1593     /*
1594      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1595      * 2^140*G, 2^196*G for the second one
1596      */
1597     for (i = 1; i <= 8; i <<= 1) {
1598         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1599                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1600                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1601         for (j = 0; j < 27; ++j) {
1602             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1603                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1604                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1605         }
1606         if (i == 8)
1607             break;
1608         point_double(pre->g_pre_comp[0][2 * i][0],
1609                      pre->g_pre_comp[0][2 * i][1],
1610                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1611                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1612         for (j = 0; j < 27; ++j) {
1613             point_double(pre->g_pre_comp[0][2 * i][0],
1614                          pre->g_pre_comp[0][2 * i][1],
1615                          pre->g_pre_comp[0][2 * i][2],
1616                          pre->g_pre_comp[0][2 * i][0],
1617                          pre->g_pre_comp[0][2 * i][1],
1618                          pre->g_pre_comp[0][2 * i][2]);
1619         }
1620     }
1621     for (i = 0; i < 2; i++) {
1622         /* g_pre_comp[i][0] is the point at infinity */
1623         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1624         /* the remaining multiples */
1625         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1626         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1627                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1628                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1629                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1630                   pre->g_pre_comp[i][2][2]);
1631         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1632         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1633                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1634                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1635                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1636                   pre->g_pre_comp[i][2][2]);
1637         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1638         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1639                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1640                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1641                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1642                   pre->g_pre_comp[i][4][2]);
1643         /*
1644          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1645          */
1646         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1647                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1648                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1649                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1650                   pre->g_pre_comp[i][2][2]);
1651         for (j = 1; j < 8; ++j) {
1652             /* odd multiples: add G resp. 2^28*G */
1653             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1654                       pre->g_pre_comp[i][2 * j + 1][1],
1655                       pre->g_pre_comp[i][2 * j + 1][2],
1656                       pre->g_pre_comp[i][2 * j][0],
1657                       pre->g_pre_comp[i][2 * j][1],
1658                       pre->g_pre_comp[i][2 * j][2], 0,
1659                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1660                       pre->g_pre_comp[i][1][2]);
1661         }
1662     }
1663     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1664
1665     SETPRECOMP(group, nistp224, pre);
1666     pre = NULL;
1667     ret = 1;
1668  err:
1669     BN_CTX_end(ctx);
1670     EC_POINT_free(generator);
1671     BN_CTX_free(new_ctx);
1672     EC_nistp224_pre_comp_free(pre);
1673     return ret;
1674 }
1675
1676 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1677 {
1678     return HAVEPRECOMP(group, nistp224);
1679 }
1680
1681 #else
1682 static void *dummy = &dummy;
1683 #endif