crypto/ec/ec_mult.c

   1 /*
   2  * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
   3  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
   4  *
   5  * Licensed under the Apache License 2.0 (the "License").  You may not use
   6  * this file except in compliance with the License.  You can obtain a copy
   7  * in the file LICENSE in the source distribution or at
   8  * https://www.openssl.org/source/license.html
   9  */
  10
  11 /*
  12  * ECDSA low level APIs are deprecated for public use, but still ok for
  13  * internal use.
  14  */
  15 #include "internal/deprecated.h"
  16
  17 #include <string.h>
  18 #include <openssl/err.h>
  19
  20 #include "internal/cryptlib.h"
  21 #include "crypto/bn.h"
  22 #include "ec_local.h"
  23 #include "internal/refcount.h"
  24
  25 /*
  26  * This file implements the wNAF-based interleaving multi-exponentiation method
  27  * Formerly at:
  28  *   http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#multiexp
  29  * You might now find it here:
  30  *   http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13
  31  *   http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf
  32  * For multiplication with precomputation, we use wNAF splitting, formerly at:
  33  *   http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#fastexp
  34  */
  35
  36 /* structure for precomputed multiples of the generator */
  37 struct ec_pre_comp_st {
  38     const EC_GROUP *group;      /* parent EC_GROUP object */
  39     size_t blocksize;           /* block size for wNAF splitting */
  40     size_t numblocks;           /* max. number of blocks for which we have
  41                                  * precomputation */
  42     size_t w;                   /* window size */
  43     EC_POINT **points;          /* array with pre-calculated multiples of
  44                                  * generator: 'num' pointers to EC_POINT
  45                                  * objects followed by a NULL */
  46     size_t num;                 /* numblocks * 2^(w-1) */
  47     CRYPTO_REF_COUNT references;
  48     CRYPTO_RWLOCK *lock;
  49 };
  50
  51 static EC_PRE_COMP *ec_pre_comp_new(const EC_GROUP *group)
  52 {
  53     EC_PRE_COMP *ret = NULL;
  54
  55     if (!group)
  56         return NULL;
  57
  58     ret = OPENSSL_zalloc(sizeof(*ret));
  59     if (ret == NULL) {
  60         ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
  61         return ret;
  62     }
  63
  64     ret->group = group;
  65     ret->blocksize = 8;         /* default */
  66     ret->w = 4;                 /* default */
  67     ret->references = 1;
  68
  69     ret->lock = CRYPTO_THREAD_lock_new();
  70     if (ret->lock == NULL) {
  71         ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
  72         OPENSSL_free(ret);
  73         return NULL;
  74     }
  75     return ret;
  76 }
  77
  78 EC_PRE_COMP *EC_ec_pre_comp_dup(EC_PRE_COMP *pre)
  79 {
  80     int i;
  81     if (pre != NULL)
  82         CRYPTO_UP_REF(&pre->references, &i, pre->lock);
  83     return pre;
  84 }
  85
  86 void EC_ec_pre_comp_free(EC_PRE_COMP *pre)
  87 {
  88     int i;
  89
  90     if (pre == NULL)
  91         return;
  92
  93     CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
  94     REF_PRINT_COUNT("EC_ec", pre);
  95     if (i > 0)
  96         return;
  97     REF_ASSERT_ISNT(i < 0);
  98
  99     if (pre->points != NULL) {
 100         EC_POINT **pts;
 101
 102         for (pts = pre->points; *pts != NULL; pts++)
 103             EC_POINT_free(*pts);
 104         OPENSSL_free(pre->points);
 105     }
 106     CRYPTO_THREAD_lock_free(pre->lock);
 107     OPENSSL_free(pre);
 108 }
 109
 110 #define EC_POINT_BN_set_flags(P, flags) do { \
 111     BN_set_flags((P)->X, (flags)); \
 112     BN_set_flags((P)->Y, (flags)); \
 113     BN_set_flags((P)->Z, (flags)); \
 114 } while(0)
 115
 116 /*-
 117  * This functions computes a single point multiplication over the EC group,
 118  * using, at a high level, a Montgomery ladder with conditional swaps, with
 119  * various timing attack defenses.
 120  *
 121  * It performs either a fixed point multiplication
 122  *          (scalar * generator)
 123  * when point is NULL, or a variable point multiplication
 124  *          (scalar * point)
 125  * when point is not NULL.
 126  *
 127  * `scalar` cannot be NULL and should be in the range [0,n) otherwise all
 128  * constant time bets are off (where n is the cardinality of the EC group).
 129  *
 130  * This function expects `group->order` and `group->cardinality` to be well
 131  * defined and non-zero: it fails with an error code otherwise.
 132  *
 133  * NB: This says nothing about the constant-timeness of the ladder step
 134  * implementation (i.e., the default implementation is based on EC_POINT_add and
 135  * EC_POINT_dbl, which of course are not constant time themselves) or the
 136  * underlying multiprecision arithmetic.
 137  *
 138  * The product is stored in `r`.
 139  *
 140  * This is an internal function: callers are in charge of ensuring that the
 141  * input parameters `group`, `r`, `scalar` and `ctx` are not NULL.
 142  *
 143  * Returns 1 on success, 0 otherwise.
 144  */
 145 int ec_scalar_mul_ladder(const EC_GROUP *group, EC_POINT *r,
 146                          const BIGNUM *scalar, const EC_POINT *point,
 147                          BN_CTX *ctx)
 148 {
 149     int i, cardinality_bits, group_top, kbit, pbit, Z_is_one;
 150     EC_POINT *p = NULL;
 151     EC_POINT *s = NULL;
 152     BIGNUM *k = NULL;
 153     BIGNUM *lambda = NULL;
 154     BIGNUM *cardinality = NULL;
 155     int ret = 0;
 156
 157     /* early exit if the input point is the point at infinity */
 158     if (point != NULL && EC_POINT_is_at_infinity(group, point))
 159         return EC_POINT_set_to_infinity(group, r);
 160
 161     if (BN_is_zero(group->order)) {
 162         ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_UNKNOWN_ORDER);
 163         return 0;
 164     }
 165     if (BN_is_zero(group->cofactor)) {
 166         ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_UNKNOWN_COFACTOR);
 167         return 0;
 168     }
 169
 170     BN_CTX_start(ctx);
 171
 172     if (((p = EC_POINT_new(group)) == NULL)
 173         || ((s = EC_POINT_new(group)) == NULL)) {
 174         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_MALLOC_FAILURE);
 175         goto err;
 176     }
 177
 178     if (point == NULL) {
 179         if (!EC_POINT_copy(p, group->generator)) {
 180             ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB);
 181             goto err;
 182         }
 183     } else {
 184         if (!EC_POINT_copy(p, point)) {
 185             ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB);
 186             goto err;
 187         }
 188     }
 189
 190     EC_POINT_BN_set_flags(p, BN_FLG_CONSTTIME);
 191     EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME);
 192     EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME);
 193
 194     cardinality = BN_CTX_get(ctx);
 195     lambda = BN_CTX_get(ctx);
 196     k = BN_CTX_get(ctx);
 197     if (k == NULL) {
 198         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_MALLOC_FAILURE);
 199         goto err;
 200     }
 201
 202     if (!BN_mul(cardinality, group->order, group->cofactor, ctx)) {
 203         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 204         goto err;
 205     }
 206
 207     /*
 208      * Group cardinalities are often on a word boundary.
 209      * So when we pad the scalar, some timing diff might
 210      * pop if it needs to be expanded due to carries.
 211      * So expand ahead of time.
 212      */
 213     cardinality_bits = BN_num_bits(cardinality);
 214     group_top = bn_get_top(cardinality);
 215     if ((bn_wexpand(k, group_top + 2) == NULL)
 216         || (bn_wexpand(lambda, group_top + 2) == NULL)) {
 217         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 218         goto err;
 219     }
 220
 221     if (!BN_copy(k, scalar)) {
 222         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 223         goto err;
 224     }
 225
 226     BN_set_flags(k, BN_FLG_CONSTTIME);
 227
 228     if ((BN_num_bits(k) > cardinality_bits) || (BN_is_negative(k))) {
 229         /*-
 230          * this is an unusual input, and we don't guarantee
 231          * constant-timeness
 232          */
 233         if (!BN_nnmod(k, k, cardinality, ctx)) {
 234             ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 235             goto err;
 236         }
 237     }
 238
 239     if (!BN_add(lambda, k, cardinality)) {
 240         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 241         goto err;
 242     }
 243     BN_set_flags(lambda, BN_FLG_CONSTTIME);
 244     if (!BN_add(k, lambda, cardinality)) {
 245         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 246         goto err;
 247     }
 248     /*
 249      * lambda := scalar + cardinality
 250      * k := scalar + 2*cardinality
 251      */
 252     kbit = BN_is_bit_set(lambda, cardinality_bits);
 253     BN_consttime_swap(kbit, k, lambda, group_top + 2);
 254
 255     group_top = bn_get_top(group->field);
 256     if ((bn_wexpand(s->X, group_top) == NULL)
 257         || (bn_wexpand(s->Y, group_top) == NULL)
 258         || (bn_wexpand(s->Z, group_top) == NULL)
 259         || (bn_wexpand(r->X, group_top) == NULL)
 260         || (bn_wexpand(r->Y, group_top) == NULL)
 261         || (bn_wexpand(r->Z, group_top) == NULL)
 262         || (bn_wexpand(p->X, group_top) == NULL)
 263         || (bn_wexpand(p->Y, group_top) == NULL)
 264         || (bn_wexpand(p->Z, group_top) == NULL)) {
 265         ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB);
 266         goto err;
 267     }
 268
 269     /* ensure input point is in affine coords for ladder step efficiency */
 270     if (!p->Z_is_one && !EC_POINT_make_affine(group, p, ctx)) {
 271             ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB);
 272             goto err;
 273     }
 274
 275     /* Initialize the Montgomery ladder */
 276     if (!ec_point_ladder_pre(group, r, s, p, ctx)) {
 277         ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_PRE_FAILURE);
 278         goto err;
 279     }
 280
 281     /* top bit is a 1, in a fixed pos */
 282     pbit = 1;
 283
 284 #define EC_POINT_CSWAP(c, a, b, w, t) do {         \
 285         BN_consttime_swap(c, (a)->X, (b)->X, w);   \
 286         BN_consttime_swap(c, (a)->Y, (b)->Y, w);   \
 287         BN_consttime_swap(c, (a)->Z, (b)->Z, w);   \
 288         t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \
 289         (a)->Z_is_one ^= (t);                      \
 290         (b)->Z_is_one ^= (t);                      \
 291 } while(0)
 292
 293     /*-
 294      * The ladder step, with branches, is
 295      *
 296      * k[i] == 0: S = add(R, S), R = dbl(R)
 297      * k[i] == 1: R = add(S, R), S = dbl(S)
 298      *
 299      * Swapping R, S conditionally on k[i] leaves you with state
 300      *
 301      * k[i] == 0: T, U = R, S
 302      * k[i] == 1: T, U = S, R
 303      *
 304      * Then perform the ECC ops.
 305      *
 306      * U = add(T, U)
 307      * T = dbl(T)
 308      *
 309      * Which leaves you with state
 310      *
 311      * k[i] == 0: U = add(R, S), T = dbl(R)
 312      * k[i] == 1: U = add(S, R), T = dbl(S)
 313      *
 314      * Swapping T, U conditionally on k[i] leaves you with state
 315      *
 316      * k[i] == 0: R, S = T, U
 317      * k[i] == 1: R, S = U, T
 318      *
 319      * Which leaves you with state
 320      *
 321      * k[i] == 0: S = add(R, S), R = dbl(R)
 322      * k[i] == 1: R = add(S, R), S = dbl(S)
 323      *
 324      * So we get the same logic, but instead of a branch it's a
 325      * conditional swap, followed by ECC ops, then another conditional swap.
 326      *
 327      * Optimization: The end of iteration i and start of i-1 looks like
 328      *
 329      * ...
 330      * CSWAP(k[i], R, S)
 331      * ECC
 332      * CSWAP(k[i], R, S)
 333      * (next iteration)
 334      * CSWAP(k[i-1], R, S)
 335      * ECC
 336      * CSWAP(k[i-1], R, S)
 337      * ...
 338      *
 339      * So instead of two contiguous swaps, you can merge the condition
 340      * bits and do a single swap.
 341      *
 342      * k[i]   k[i-1]    Outcome
 343      * 0      0         No Swap
 344      * 0      1         Swap
 345      * 1      0         Swap
 346      * 1      1         No Swap
 347      *
 348      * This is XOR. pbit tracks the previous bit of k.
 349      */
 350
 351     for (i = cardinality_bits - 1; i >= 0; i--) {
 352         kbit = BN_is_bit_set(k, i) ^ pbit;
 353         EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one);
 354
 355         /* Perform a single step of the Montgomery ladder */
 356         if (!ec_point_ladder_step(group, r, s, p, ctx)) {
 357             ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_STEP_FAILURE);
 358             goto err;
 359         }
 360         /*
 361          * pbit logic merges this cswap with that of the
 362          * next iteration
 363          */
 364         pbit ^= kbit;
 365     }
 366     /* one final cswap to move the right value into r */
 367     EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one);
 368 #undef EC_POINT_CSWAP
 369
 370     /* Finalize ladder (and recover full point coordinates) */
 371     if (!ec_point_ladder_post(group, r, s, p, ctx)) {
 372         ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_POST_FAILURE);
 373         goto err;
 374     }
 375
 376     ret = 1;
 377
 378  err:
 379     EC_POINT_free(p);
 380     EC_POINT_clear_free(s);
 381     BN_CTX_end(ctx);
 382
 383     return ret;
 384 }
 385
 386 #undef EC_POINT_BN_set_flags
 387
 388 /*
 389  * TODO: table should be optimised for the wNAF-based implementation,
 390  * sometimes smaller windows will give better performance (thus the
 391  * boundaries should be increased)
 392  */
 393 #define EC_window_bits_for_scalar_size(b) \
 394                 ((size_t) \
 395                  ((b) >= 2000 ? 6 : \
 396                   (b) >=  800 ? 5 : \
 397                   (b) >=  300 ? 4 : \
 398                   (b) >=   70 ? 3 : \
 399                   (b) >=   20 ? 2 : \
 400                   1))
 401
 402 /*-
 403  * Compute
 404  *      \sum scalars[i]*points[i],
 405  * also including
 406  *      scalar*generator
 407  * in the addition if scalar != NULL
 408  */
 409 int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
 410                 size_t num, const EC_POINT *points[], const BIGNUM *scalars[],
 411                 BN_CTX *ctx)
 412 {
 413     const EC_POINT *generator = NULL;
 414     EC_POINT *tmp = NULL;
 415     size_t totalnum;
 416     size_t blocksize = 0, numblocks = 0; /* for wNAF splitting */
 417     size_t pre_points_per_block = 0;
 418     size_t i, j;
 419     int k;
 420     int r_is_inverted = 0;
 421     int r_is_at_infinity = 1;
 422     size_t *wsize = NULL;       /* individual window sizes */
 423     signed char **wNAF = NULL;  /* individual wNAFs */
 424     size_t *wNAF_len = NULL;
 425     size_t max_len = 0;
 426     size_t num_val;
 427     EC_POINT **val = NULL;      /* precomputation */
 428     EC_POINT **v;
 429     EC_POINT ***val_sub = NULL; /* pointers to sub-arrays of 'val' or
 430                                  * 'pre_comp->points' */
 431     const EC_PRE_COMP *pre_comp = NULL;
 432     int num_scalar = 0;         /* flag: will be set to 1 if 'scalar' must be
 433                                  * treated like other scalars, i.e.
 434                                  * precomputation is not available */
 435     int ret = 0;
 436
 437     if (!BN_is_zero(group->order) && !BN_is_zero(group->cofactor)) {
 438         /*-
 439          * Handle the common cases where the scalar is secret, enforcing a
 440          * scalar multiplication implementation based on a Montgomery ladder,
 441          * with various timing attack defenses.
 442          */
 443         if ((scalar != group->order) && (scalar != NULL) && (num == 0)) {
 444             /*-
 445              * In this case we want to compute scalar * GeneratorPoint: this
 446              * codepath is reached most prominently by (ephemeral) key
 447              * generation of EC cryptosystems (i.e. ECDSA keygen and sign setup,
 448              * ECDH keygen/first half), where the scalar is always secret. This
 449              * is why we ignore if BN_FLG_CONSTTIME is actually set and we
 450              * always call the ladder version.
 451              */
 452             return ec_scalar_mul_ladder(group, r, scalar, NULL, ctx);
 453         }
 454         if ((scalar == NULL) && (num == 1) && (scalars[0] != group->order)) {
 455             /*-
 456              * In this case we want to compute scalar * VariablePoint: this
 457              * codepath is reached most prominently by the second half of ECDH,
 458              * where the secret scalar is multiplied by the peer's public point.
 459              * To protect the secret scalar, we ignore if BN_FLG_CONSTTIME is
 460              * actually set and we always call the ladder version.
 461              */
 462             return ec_scalar_mul_ladder(group, r, scalars[0], points[0], ctx);
 463         }
 464     }
 465
 466     if (scalar != NULL) {
 467         generator = EC_GROUP_get0_generator(group);
 468         if (generator == NULL) {
 469             ECerr(EC_F_EC_WNAF_MUL, EC_R_UNDEFINED_GENERATOR);
 470             goto err;
 471         }
 472
 473         /* look if we can use precomputed multiples of generator */
 474
 475         pre_comp = group->pre_comp.ec;
 476         if (pre_comp && pre_comp->numblocks
 477             && (EC_POINT_cmp(group, generator, pre_comp->points[0], ctx) ==
 478                 0)) {
 479             blocksize = pre_comp->blocksize;
 480
 481             /*
 482              * determine maximum number of blocks that wNAF splitting may
 483              * yield (NB: maximum wNAF length is bit length plus one)
 484              */
 485             numblocks = (BN_num_bits(scalar) / blocksize) + 1;
 486
 487             /*
 488              * we cannot use more blocks than we have precomputation for
 489              */
 490             if (numblocks > pre_comp->numblocks)
 491                 numblocks = pre_comp->numblocks;
 492
 493             pre_points_per_block = (size_t)1 << (pre_comp->w - 1);
 494
 495             /* check that pre_comp looks sane */
 496             if (pre_comp->num != (pre_comp->numblocks * pre_points_per_block)) {
 497                 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 498                 goto err;
 499             }
 500         } else {
 501             /* can't use precomputation */
 502             pre_comp = NULL;
 503             numblocks = 1;
 504             num_scalar = 1;     /* treat 'scalar' like 'num'-th element of
 505                                  * 'scalars' */
 506         }
 507     }
 508
 509     totalnum = num + numblocks;
 510
 511     wsize = OPENSSL_malloc(totalnum * sizeof(wsize[0]));
 512     wNAF_len = OPENSSL_malloc(totalnum * sizeof(wNAF_len[0]));
 513     /* include space for pivot */
 514     wNAF = OPENSSL_malloc((totalnum + 1) * sizeof(wNAF[0]));
 515     val_sub = OPENSSL_malloc(totalnum * sizeof(val_sub[0]));
 516
 517     /* Ensure wNAF is initialised in case we end up going to err */
 518     if (wNAF != NULL)
 519         wNAF[0] = NULL;         /* preliminary pivot */
 520
 521     if (wsize == NULL || wNAF_len == NULL || wNAF == NULL || val_sub == NULL) {
 522         ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE);
 523         goto err;
 524     }
 525
 526     /*
 527      * num_val will be the total number of temporarily precomputed points
 528      */
 529     num_val = 0;
 530
 531     for (i = 0; i < num + num_scalar; i++) {
 532         size_t bits;
 533
 534         bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(scalar);
 535         wsize[i] = EC_window_bits_for_scalar_size(bits);
 536         num_val += (size_t)1 << (wsize[i] - 1);
 537         wNAF[i + 1] = NULL;     /* make sure we always have a pivot */
 538         wNAF[i] =
 539             bn_compute_wNAF((i < num ? scalars[i] : scalar), wsize[i],
 540                             &wNAF_len[i]);
 541         if (wNAF[i] == NULL)
 542             goto err;
 543         if (wNAF_len[i] > max_len)
 544             max_len = wNAF_len[i];
 545     }
 546
 547     if (numblocks) {
 548         /* we go here iff scalar != NULL */
 549
 550         if (pre_comp == NULL) {
 551             if (num_scalar != 1) {
 552                 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 553                 goto err;
 554             }
 555             /* we have already generated a wNAF for 'scalar' */
 556         } else {
 557             signed char *tmp_wNAF = NULL;
 558             size_t tmp_len = 0;
 559
 560             if (num_scalar != 0) {
 561                 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 562                 goto err;
 563             }
 564
 565             /*
 566              * use the window size for which we have precomputation
 567              */
 568             wsize[num] = pre_comp->w;
 569             tmp_wNAF = bn_compute_wNAF(scalar, wsize[num], &tmp_len);
 570             if (!tmp_wNAF)
 571                 goto err;
 572
 573             if (tmp_len <= max_len) {
 574                 /*
 575                  * One of the other wNAFs is at least as long as the wNAF
 576                  * belonging to the generator, so wNAF splitting will not buy
 577                  * us anything.
 578                  */
 579
 580                 numblocks = 1;
 581                 totalnum = num + 1; /* don't use wNAF splitting */
 582                 wNAF[num] = tmp_wNAF;
 583                 wNAF[num + 1] = NULL;
 584                 wNAF_len[num] = tmp_len;
 585                 /*
 586                  * pre_comp->points starts with the points that we need here:
 587                  */
 588                 val_sub[num] = pre_comp->points;
 589             } else {
 590                 /*
 591                  * don't include tmp_wNAF directly into wNAF array - use wNAF
 592                  * splitting and include the blocks
 593                  */
 594
 595                 signed char *pp;
 596                 EC_POINT **tmp_points;
 597
 598                 if (tmp_len < numblocks * blocksize) {
 599                     /*
 600                      * possibly we can do with fewer blocks than estimated
 601                      */
 602                     numblocks = (tmp_len + blocksize - 1) / blocksize;
 603                     if (numblocks > pre_comp->numblocks) {
 604                         ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 605                         OPENSSL_free(tmp_wNAF);
 606                         goto err;
 607                     }
 608                     totalnum = num + numblocks;
 609                 }
 610
 611                 /* split wNAF in 'numblocks' parts */
 612                 pp = tmp_wNAF;
 613                 tmp_points = pre_comp->points;
 614
 615                 for (i = num; i < totalnum; i++) {
 616                     if (i < totalnum - 1) {
 617                         wNAF_len[i] = blocksize;
 618                         if (tmp_len < blocksize) {
 619                             ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 620                             OPENSSL_free(tmp_wNAF);
 621                             goto err;
 622                         }
 623                         tmp_len -= blocksize;
 624                     } else
 625                         /*
 626                          * last block gets whatever is left (this could be
 627                          * more or less than 'blocksize'!)
 628                          */
 629                         wNAF_len[i] = tmp_len;
 630
 631                     wNAF[i + 1] = NULL;
 632                     wNAF[i] = OPENSSL_malloc(wNAF_len[i]);
 633                     if (wNAF[i] == NULL) {
 634                         ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE);
 635                         OPENSSL_free(tmp_wNAF);
 636                         goto err;
 637                     }
 638                     memcpy(wNAF[i], pp, wNAF_len[i]);
 639                     if (wNAF_len[i] > max_len)
 640                         max_len = wNAF_len[i];
 641
 642                     if (*tmp_points == NULL) {
 643                         ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 644                         OPENSSL_free(tmp_wNAF);
 645                         goto err;
 646                     }
 647                     val_sub[i] = tmp_points;
 648                     tmp_points += pre_points_per_block;
 649                     pp += blocksize;
 650                 }
 651                 OPENSSL_free(tmp_wNAF);
 652             }
 653         }
 654     }
 655
 656     /*
 657      * All points we precompute now go into a single array 'val'.
 658      * 'val_sub[i]' is a pointer to the subarray for the i-th point, or to a
 659      * subarray of 'pre_comp->points' if we already have precomputation.
 660      */
 661     val = OPENSSL_malloc((num_val + 1) * sizeof(val[0]));
 662     if (val == NULL) {
 663         ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE);
 664         goto err;
 665     }
 666     val[num_val] = NULL;        /* pivot element */
 667
 668     /* allocate points for precomputation */
 669     v = val;
 670     for (i = 0; i < num + num_scalar; i++) {
 671         val_sub[i] = v;
 672         for (j = 0; j < ((size_t)1 << (wsize[i] - 1)); j++) {
 673             *v = EC_POINT_new(group);
 674             if (*v == NULL)
 675                 goto err;
 676             v++;
 677         }
 678     }
 679     if (!(v == val + num_val)) {
 680         ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR);
 681         goto err;
 682     }
 683
 684     if ((tmp = EC_POINT_new(group)) == NULL)
 685         goto err;
 686
 687     /*-
 688      * prepare precomputed values:
 689      *    val_sub[i][0] :=     points[i]
 690      *    val_sub[i][1] := 3 * points[i]
 691      *    val_sub[i][2] := 5 * points[i]
 692      *    ...
 693      */
 694     for (i = 0; i < num + num_scalar; i++) {
 695         if (i < num) {
 696             if (!EC_POINT_copy(val_sub[i][0], points[i]))
 697                 goto err;
 698         } else {
 699             if (!EC_POINT_copy(val_sub[i][0], generator))
 700                 goto err;
 701         }
 702
 703         if (wsize[i] > 1) {
 704             if (!EC_POINT_dbl(group, tmp, val_sub[i][0], ctx))
 705                 goto err;
 706             for (j = 1; j < ((size_t)1 << (wsize[i] - 1)); j++) {
 707                 if (!EC_POINT_add
 708                     (group, val_sub[i][j], val_sub[i][j - 1], tmp, ctx))
 709                     goto err;
 710             }
 711         }
 712     }
 713
 714     if (!EC_POINTs_make_affine(group, num_val, val, ctx))
 715         goto err;
 716
 717     r_is_at_infinity = 1;
 718
 719     for (k = max_len - 1; k >= 0; k--) {
 720         if (!r_is_at_infinity) {
 721             if (!EC_POINT_dbl(group, r, r, ctx))
 722                 goto err;
 723         }
 724
 725         for (i = 0; i < totalnum; i++) {
 726             if (wNAF_len[i] > (size_t)k) {
 727                 int digit = wNAF[i][k];
 728                 int is_neg;
 729
 730                 if (digit) {
 731                     is_neg = digit < 0;
 732
 733                     if (is_neg)
 734                         digit = -digit;
 735
 736                     if (is_neg != r_is_inverted) {
 737                         if (!r_is_at_infinity) {
 738                             if (!EC_POINT_invert(group, r, ctx))
 739                                 goto err;
 740                         }
 741                         r_is_inverted = !r_is_inverted;
 742                     }
 743
 744                     /* digit > 0 */
 745
 746                     if (r_is_at_infinity) {
 747                         if (!EC_POINT_copy(r, val_sub[i][digit >> 1]))
 748                             goto err;
 749                         r_is_at_infinity = 0;
 750                     } else {
 751                         if (!EC_POINT_add
 752                             (group, r, r, val_sub[i][digit >> 1], ctx))
 753                             goto err;
 754                     }
 755                 }
 756             }
 757         }
 758     }
 759
 760     if (r_is_at_infinity) {
 761         if (!EC_POINT_set_to_infinity(group, r))
 762             goto err;
 763     } else {
 764         if (r_is_inverted)
 765             if (!EC_POINT_invert(group, r, ctx))
 766                 goto err;
 767     }
 768
 769     ret = 1;
 770
 771  err:
 772     EC_POINT_free(tmp);
 773     OPENSSL_free(wsize);
 774     OPENSSL_free(wNAF_len);
 775     if (wNAF != NULL) {
 776         signed char **w;
 777
 778         for (w = wNAF; *w != NULL; w++)
 779             OPENSSL_free(*w);
 780
 781         OPENSSL_free(wNAF);
 782     }
 783     if (val != NULL) {
 784         for (v = val; *v != NULL; v++)
 785             EC_POINT_clear_free(*v);
 786
 787         OPENSSL_free(val);
 788     }
 789     OPENSSL_free(val_sub);
 790     return ret;
 791 }
 792
 793 /*-
 794  * ec_wNAF_precompute_mult()
 795  * creates an EC_PRE_COMP object with preprecomputed multiples of the generator
 796  * for use with wNAF splitting as implemented in ec_wNAF_mul().
 797  *
 798  * 'pre_comp->points' is an array of multiples of the generator
 799  * of the following form:
 800  * points[0] =     generator;
 801  * points[1] = 3 * generator;
 802  * ...
 803  * points[2^(w-1)-1] =     (2^(w-1)-1) * generator;
 804  * points[2^(w-1)]   =     2^blocksize * generator;
 805  * points[2^(w-1)+1] = 3 * 2^blocksize * generator;
 806  * ...
 807  * points[2^(w-1)*(numblocks-1)-1] = (2^(w-1)) *  2^(blocksize*(numblocks-2)) * generator
 808  * points[2^(w-1)*(numblocks-1)]   =              2^(blocksize*(numblocks-1)) * generator
 809  * ...
 810  * points[2^(w-1)*numblocks-1]     = (2^(w-1)) *  2^(blocksize*(numblocks-1)) * generator
 811  * points[2^(w-1)*numblocks]       = NULL
 812  */
 813 int ec_wNAF_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
 814 {
 815     const EC_POINT *generator;
 816     EC_POINT *tmp_point = NULL, *base = NULL, **var;
 817     const BIGNUM *order;
 818     size_t i, bits, w, pre_points_per_block, blocksize, numblocks, num;
 819     EC_POINT **points = NULL;
 820     EC_PRE_COMP *pre_comp;
 821     int ret = 0;
 822 #ifndef FIPS_MODE
 823     BN_CTX *new_ctx = NULL;
 824 #endif
 825
 826     /* if there is an old EC_PRE_COMP object, throw it away */
 827     EC_pre_comp_free(group);
 828     if ((pre_comp = ec_pre_comp_new(group)) == NULL)
 829         return 0;
 830
 831     generator = EC_GROUP_get0_generator(group);
 832     if (generator == NULL) {
 833         ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNDEFINED_GENERATOR);
 834         goto err;
 835     }
 836
 837 #ifndef FIPS_MODE
 838     if (ctx == NULL)
 839         ctx = new_ctx = BN_CTX_new();
 840 #endif
 841     if (ctx == NULL)
 842         goto err;
 843
 844     BN_CTX_start(ctx);
 845
 846     order = EC_GROUP_get0_order(group);
 847     if (order == NULL)
 848         goto err;
 849     if (BN_is_zero(order)) {
 850         ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNKNOWN_ORDER);
 851         goto err;
 852     }
 853
 854     bits = BN_num_bits(order);
 855     /*
 856      * The following parameters mean we precompute (approximately) one point
 857      * per bit. TBD: The combination 8, 4 is perfect for 160 bits; for other
 858      * bit lengths, other parameter combinations might provide better
 859      * efficiency.
 860      */
 861     blocksize = 8;
 862     w = 4;
 863     if (EC_window_bits_for_scalar_size(bits) > w) {
 864         /* let's not make the window too small ... */
 865         w = EC_window_bits_for_scalar_size(bits);
 866     }
 867
 868     numblocks = (bits + blocksize - 1) / blocksize; /* max. number of blocks
 869                                                      * to use for wNAF
 870                                                      * splitting */
 871
 872     pre_points_per_block = (size_t)1 << (w - 1);
 873     num = pre_points_per_block * numblocks; /* number of points to compute
 874                                              * and store */
 875
 876     points = OPENSSL_malloc(sizeof(*points) * (num + 1));
 877     if (points == NULL) {
 878         ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE);
 879         goto err;
 880     }
 881
 882     var = points;
 883     var[num] = NULL;            /* pivot */
 884     for (i = 0; i < num; i++) {
 885         if ((var[i] = EC_POINT_new(group)) == NULL) {
 886             ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE);
 887             goto err;
 888         }
 889     }
 890
 891     if ((tmp_point = EC_POINT_new(group)) == NULL
 892         || (base = EC_POINT_new(group)) == NULL) {
 893         ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE);
 894         goto err;
 895     }
 896
 897     if (!EC_POINT_copy(base, generator))
 898         goto err;
 899
 900     /* do the precomputation */
 901     for (i = 0; i < numblocks; i++) {
 902         size_t j;
 903
 904         if (!EC_POINT_dbl(group, tmp_point, base, ctx))
 905             goto err;
 906
 907         if (!EC_POINT_copy(*var++, base))
 908             goto err;
 909
 910         for (j = 1; j < pre_points_per_block; j++, var++) {
 911             /*
 912              * calculate odd multiples of the current base point
 913              */
 914             if (!EC_POINT_add(group, *var, tmp_point, *(var - 1), ctx))
 915                 goto err;
 916         }
 917
 918         if (i < numblocks - 1) {
 919             /*
 920              * get the next base (multiply current one by 2^blocksize)
 921              */
 922             size_t k;
 923
 924             if (blocksize <= 2) {
 925                 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_INTERNAL_ERROR);
 926                 goto err;
 927             }
 928
 929             if (!EC_POINT_dbl(group, base, tmp_point, ctx))
 930                 goto err;
 931             for (k = 2; k < blocksize; k++) {
 932                 if (!EC_POINT_dbl(group, base, base, ctx))
 933                     goto err;
 934             }
 935         }
 936     }
 937
 938     if (!EC_POINTs_make_affine(group, num, points, ctx))
 939         goto err;
 940
 941     pre_comp->group = group;
 942     pre_comp->blocksize = blocksize;
 943     pre_comp->numblocks = numblocks;
 944     pre_comp->w = w;
 945     pre_comp->points = points;
 946     points = NULL;
 947     pre_comp->num = num;
 948     SETPRECOMP(group, ec, pre_comp);
 949     pre_comp = NULL;
 950     ret = 1;
 951
 952  err:
 953     BN_CTX_end(ctx);
 954 #ifndef FIPS_MODE
 955     BN_CTX_free(new_ctx);
 956 #endif
 957     EC_ec_pre_comp_free(pre_comp);
 958     if (points) {
 959         EC_POINT **p;
 960
 961         for (p = points; *p != NULL; p++)
 962             EC_POINT_free(*p);
 963         OPENSSL_free(points);
 964     }
 965     EC_POINT_free(tmp_point);
 966     EC_POINT_free(base);
 967     return ret;
 968 }
 969
 970 int ec_wNAF_have_precompute_mult(const EC_GROUP *group)
 971 {
 972     return HAVEPRECOMP(group, ec);
 973 }