2020-11-08 Niels Möller <nisse@lysator.liu.se>
* ecc-mul-m.c (ecc_mul_m): Reduce scratch need.
+ (ecc_mul_m): Optimize swapping, with only a single mpn_cnd_swap
+ per iteration.
+
* ecc-add-jja.c (ecc_add_jja): Reduce scratch need.
* ecc-add-jjj.c (ecc_add_jjj): Reduce scratch need.
* ecc-internal.h (ECC_ADD_JJA_ITCH, ECC_ADD_JJJ_ITCH): Now 5*size.
mp_limb_t *scratch)
{
unsigned i;
- mp_limb_t cy;
+ mp_limb_t cy, swap;
#define x2 (scratch)
#define z2 (scratch + m->size)
ecc_mod_addmul_1 (m, AA, E, a24);
ecc_mod_mul (m, z3, E, AA, tp);
- for (i = bit_high; i >= bit_low; i--)
+ for (i = bit_high, swap = 0; i >= bit_low; i--)
{
- int bit = (n[i/8] >> (i & 7)) & 1;
+ mp_limb_t bit = (n[i/8] >> (i & 7)) & 1;
- mpn_cnd_swap (bit, x2, x3, 2*m->size);
+ mpn_cnd_swap (swap ^ bit, x2, x3, 2*m->size);
+ swap = bit;
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, D, x3, z3);
ecc_mod_sub (m, z3, DA, z3); /* DA - CB */
ecc_mod_sqr (m, z3, z3, tp);
ecc_mod_mul (m, z3, z3, px, tp);
-
- /* FIXME: Could be combined with the loop's initial mpn_cnd_swap. */
- mpn_cnd_swap (bit, x2, x3, 2*m->size);
}
+ mpn_cnd_swap (swap, x2, x3, 2*m->size);
+
/* Do the low zero bits, just duplicating x2 */
for (i = 0; i < bit_low; i++)
{