2020-11-01 Niels Möller <nisse@lysator.liu.se>
+ * ecc-mod-arith.c (ecc_mod_mul, ecc_mod_sqr): Separate argument
+ for scratch area, reducing required size of result area. Update
+ all callers to naïvely keep using result in scratch area.
+ (ecc_mod_pow_2k, ecc_mod_pow_2k_mul): Simplified, also reducing
+ required size of result area.
+
* testsuite/testutils.c (test_ecc_point): Show curve bits on failure.
2020-10-31 Niels Möller <nisse@lysator.liu.se>
ecc->p.invert (&ecc->p, t1, t0, t2 + ecc->p.size);
ecc_mod_add (&ecc->p, t0, wp, vp);
- ecc_mod_mul (&ecc->p, t2, t0, t1);
+ ecc_mod_mul (&ecc->p, t2, t0, t1, t2);
cy = mpn_sub_n (xp, t2, ecc->p.m, ecc->p.size);
cnd_copy (cy, xp, t2, ecc->p.size);
*/
/* Needs a total of 9*size storage. */
ecc->p.invert (&ecc->p, t0, p, t1 + ecc->p.size);
- ecc_mod_mul (&ecc->p, t1, t0, vp);
- ecc_mod_mul (&ecc->p, t2, t1, t1);
+ ecc_mod_mul (&ecc->p, t1, t0, vp, t1);
+ ecc_mod_mul (&ecc->p, t2, t1, t1, t2);
cy = mpn_sub_n (xp, t2, ecc->p.m, ecc->p.size);
cnd_copy (cy, xp, t2, ecc->p.size);
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, x3, x1, y1);
ecc_mod_add (&ecc->p, y3, x2, y2);
- ecc_mod_mul (&ecc->p, T, x3, y3);
+ ecc_mod_mul (&ecc->p, T, x3, y3, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_sub (&ecc->p, C, D, C);
- ecc_mod_sqr (&ecc->p, B, z1);
+ ecc_mod_sqr (&ecc->p, B, z1, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, F, T);
- ecc_mod_mul (&ecc->p, x3, B, z1);
+ ecc_mod_mul (&ecc->p, B, F, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, z1, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, G, z1);
- ecc_mod_mul (&ecc->p, y3, B, C); /* Clobbers z1 in case r == p. */
+ ecc_mod_mul (&ecc->p, B, G, z1, B);
+ ecc_mod_mul (&ecc->p, y3, B, C, y3); /* Clobbers z1 in case r == p. */
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, A, x1, y1);
ecc_mod_add (&ecc->p, B, x2, y2);
- ecc_mod_mul (&ecc->p, T, A, B);
+ ecc_mod_mul (&ecc->p, T, A, B, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_sub (&ecc->p, C, D, C);
- ecc_mod_mul (&ecc->p, A, z1, z2);
- ecc_mod_sqr (&ecc->p, B, A);
+ ecc_mod_mul (&ecc->p, A, z1, z2, A);
+ ecc_mod_sqr (&ecc->p, B, A, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, F, T);
- ecc_mod_mul (&ecc->p, x3, B, A);
+ ecc_mod_mul (&ecc->p, B, F, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, A, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, G, C);
- ecc_mod_mul (&ecc->p, y3, B, A);
+ ecc_mod_mul (&ecc->p, B, G, C, B);
+ ecc_mod_mul (&ecc->p, y3, B, A, y3);
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
#define y2 (q + ecc->p.size)
/* zz */
- ecc_mod_sqr (&ecc->p, zz, z1);
+ ecc_mod_sqr (&ecc->p, zz, z1, zz);
/* h*/
- ecc_mod_mul (&ecc->p, h, x2, zz);
+ ecc_mod_mul (&ecc->p, h, x2, zz, h);
ecc_mod_sub (&ecc->p, h, h, x1);
/* hh */
- ecc_mod_sqr (&ecc->p, hh, h);
+ ecc_mod_sqr (&ecc->p, hh, h, hh);
/* Do z^3 early, store at w. */
- ecc_mod_mul (&ecc->p, w, zz, z1);
+ ecc_mod_mul (&ecc->p, w, zz, z1, w);
/* z_3, use j area for scratch */
ecc_mod_add (&ecc->p, r + 2*ecc->p.size, p + 2*ecc->p.size, h);
- ecc_mod_sqr (&ecc->p, j, r + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, j, r + 2*ecc->p.size, j);
ecc_mod_sub (&ecc->p, j, j, zz);
ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, j, hh);
/* w */
- ecc_mod_mul (&ecc->p, j, y2, w);
+ ecc_mod_mul (&ecc->p, j, y2, w, j);
ecc_mod_sub (&ecc->p, w, j, y1);
ecc_mod_mul_1 (&ecc->p, w, w, 2);
/* i replaces hh, j */
ecc_mod_mul_1 (&ecc->p, hh, hh, 4);
- ecc_mod_mul (&ecc->p, j, hh, h);
+ ecc_mod_mul (&ecc->p, j, hh, h, j);
/* v */
- ecc_mod_mul (&ecc->p, v, x1, hh);
+ ecc_mod_mul (&ecc->p, v, x1, hh, v);
/* x_3, use (h, hh) as sqratch */
- ecc_mod_sqr (&ecc->p, h, w);
+ ecc_mod_sqr (&ecc->p, h, w, h);
ecc_mod_sub (&ecc->p, r, h, j);
ecc_mod_submul_1 (&ecc->p, r, v, 2);
/* y_3, use (h, hh) as sqratch */
- ecc_mod_mul (&ecc->p, h, y1, j); /* frees j */
+ ecc_mod_mul (&ecc->p, h, y1, j, h); /* frees j */
ecc_mod_sub (&ecc->p, r + ecc->p.size, v, r);
- ecc_mod_mul (&ecc->p, j, r + ecc->p.size, w);
+ ecc_mod_mul (&ecc->p, j, r + ecc->p.size, w, j);
ecc_mod_submul_1 (&ecc->p, j, h, 2);
mpn_copyi (r + ecc->p.size, j, ecc->p.size);
}
mp_limb_t *v = scratch + 6*ecc->p.size;
/* z1^2, z2^2, u1 = x1 x2^2, u2 = x2 z1^2 - u1 */
- ecc_mod_sqr (&ecc->p, z1z1, p + 2*ecc->p.size);
- ecc_mod_sqr (&ecc->p, z2z2, q + 2*ecc->p.size);
- ecc_mod_mul (&ecc->p, u1, p, z2z2);
- ecc_mod_mul (&ecc->p, u2, q, z1z1);
+ ecc_mod_sqr (&ecc->p, z1z1, p + 2*ecc->p.size, z1z1);
+ ecc_mod_sqr (&ecc->p, z2z2, q + 2*ecc->p.size, z2z2);
+ ecc_mod_mul (&ecc->p, u1, p, z2z2, u1);
+ ecc_mod_mul (&ecc->p, u2, q, z1z1, u2);
ecc_mod_sub (&ecc->p, u2, u2, u1); /* Store h in u2 */
/* z3, use i, j, v as scratch, result at i. */
ecc_mod_add (&ecc->p, i, p + 2*ecc->p.size, q + 2*ecc->p.size);
- ecc_mod_sqr (&ecc->p, v, i);
+ ecc_mod_sqr (&ecc->p, v, i, v);
ecc_mod_sub (&ecc->p, v, v, z1z1);
ecc_mod_sub (&ecc->p, v, v, z2z2);
- ecc_mod_mul (&ecc->p, i, v, u2);
+ ecc_mod_mul (&ecc->p, i, v, u2, i);
/* Delayed write, to support in-place operation. */
/* s1 = y1 z2^3, s2 = y2 z1^3, scratch at j and v */
- ecc_mod_mul (&ecc->p, j, z1z1, p + 2*ecc->p.size); /* z1^3 */
- ecc_mod_mul (&ecc->p, v, z2z2, q + 2*ecc->p.size); /* z2^3 */
- ecc_mod_mul (&ecc->p, s1, p + ecc->p.size, v);
- ecc_mod_mul (&ecc->p, v, j, q + ecc->p.size);
+ ecc_mod_mul (&ecc->p, j, z1z1, p + 2*ecc->p.size, j); /* z1^3 */
+ ecc_mod_mul (&ecc->p, v, z2z2, q + 2*ecc->p.size, v); /* z2^3 */
+ ecc_mod_mul (&ecc->p, s1, p + ecc->p.size, v, s1);
+ ecc_mod_mul (&ecc->p, v, j, q + ecc->p.size, v);
ecc_mod_sub (&ecc->p, s2, v, s1);
ecc_mod_mul_1 (&ecc->p, s2, s2, 2);
mpn_copyi (r + 2*ecc->p.size, i, ecc->p.size);
/* i, j, v */
- ecc_mod_sqr (&ecc->p, i, u2);
+ ecc_mod_sqr (&ecc->p, i, u2, i);
ecc_mod_mul_1 (&ecc->p, i, i, 4);
- ecc_mod_mul (&ecc->p, j, u2, i);
- ecc_mod_mul (&ecc->p, v, u1, i);
+ ecc_mod_mul (&ecc->p, j, u2, i, j);
+ ecc_mod_mul (&ecc->p, v, u1, i, v);
/* now, u1, u2 and i are free for reuse .*/
/* x3, use u1, u2 as scratch */
- ecc_mod_sqr (&ecc->p, u1, s2);
+ ecc_mod_sqr (&ecc->p, u1, s2, u1);
ecc_mod_sub (&ecc->p, r, u1, j);
ecc_mod_submul_1 (&ecc->p, r, v, 2);
/* y3 */
- ecc_mod_mul (&ecc->p, u1, s1, j); /* Frees j */
+ ecc_mod_mul (&ecc->p, u1, s1, j, u1); /* Frees j */
ecc_mod_sub (&ecc->p, u2, v, r); /* Frees v */
- ecc_mod_mul (&ecc->p, i, s2, u2);
+ ecc_mod_mul (&ecc->p, i, s2, u2, i);
ecc_mod_submul_1 (&ecc->p, i, u1, 2);
mpn_copyi (r + ecc->p.size, i, ecc->p.size);
}
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, x3, x1, y1);
ecc_mod_add (&ecc->p, y3, x2, y2);
- ecc_mod_mul (&ecc->p, T, x3, y3);
+ ecc_mod_mul (&ecc->p, T, x3, y3, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_add (&ecc->p, C, D, C);
- ecc_mod_sqr (&ecc->p, B, z1);
+ ecc_mod_sqr (&ecc->p, B, z1, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, G, T);
- ecc_mod_mul (&ecc->p, x3, B, z1);
+ ecc_mod_mul (&ecc->p, B, G, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, z1, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, F, z1);
- ecc_mod_mul (&ecc->p, y3, B, C); /* Clobbers z1 in case r == p. */
+ ecc_mod_mul (&ecc->p, B, F, z1, B);
+ ecc_mod_mul (&ecc->p, y3, B, C, y3); /* Clobbers z1 in case r == p. */
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, A, x1, y1);
ecc_mod_add (&ecc->p, B, x2, y2);
- ecc_mod_mul (&ecc->p, T, A, B);
+ ecc_mod_mul (&ecc->p, T, A, B, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_add (&ecc->p, C, D, C);
- ecc_mod_mul (&ecc->p, A, z1, z2);
- ecc_mod_sqr (&ecc->p, B, A);
+ ecc_mod_mul (&ecc->p, A, z1, z2, A);
+ ecc_mod_sqr (&ecc->p, B, A, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, G, T);
- ecc_mod_mul (&ecc->p, x3, B, A);
+ ecc_mod_mul (&ecc->p, B, G, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, A, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, F, C);
- ecc_mod_mul (&ecc->p, y3, B, A);
+ ecc_mod_mul (&ecc->p, B, F, C, B);
+ ecc_mod_mul (&ecc->p, y3, B, A, y3);
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
*/
ecc_mod_pow_2kp1 (m, t0, ap, 1, t1); /* a^3 */
- ecc_mod_sqr (m, rp, t0); /* a^6 */
- ecc_mod_mul (m, a7, rp, ap); /* a^7 */
+ ecc_mod_sqr (m, rp, t0, rp); /* a^6 */
+ ecc_mod_mul (m, a7, rp, ap, a7); /* a^7 */
ecc_mod_pow_2kp1 (m, rp, a7, 3, t0); /* a^63 = a^{2^6-1} */
- ecc_mod_sqr (m, t0, rp); /* a^{2^7-2} */
- ecc_mod_mul (m, rp, t0, ap); /* a^{2^7-1} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^7-2} */
+ ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^7-1} */
ecc_mod_pow_2kp1 (m, t0, rp, 7, t1); /* a^{2^14-1}*/
ecc_mod_pow_2kp1 (m, rp, t0, 14, t1); /* a^{2^28-1} */
- ecc_mod_sqr (m, t0, rp); /* a^{2^29-2} */
- ecc_mod_sqr (m, t1, t0); /* a^{2^30-4} */
- ecc_mod_sqr (m, t0, t1); /* a^{2^31-8} */
- ecc_mod_mul (m, rp, t0, a7); /* a^{2^31-1} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^29-2} */
+ ecc_mod_sqr (m, t1, t0, t1); /* a^{2^30-4} */
+ ecc_mod_sqr (m, t0, t1, t0); /* a^{2^31-8} */
+ ecc_mod_mul (m, rp, t0, a7, rp); /* a^{2^31-1} */
ecc_mod_pow_2kp1 (m, t0, rp, 31, t1); /* a^{2^62-1} */
ecc_mod_pow_2kp1 (m, rp, t0, 62, t1); /* a^{2^124-1}*/
- ecc_mod_sqr (m, t0, rp); /* a^{2^125-2} */
- ecc_mod_mul (m, rp, t0, ap); /* a^{2^125-1} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^125-2} */
+ ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^125-1} */
ecc_mod_pow_2kp1 (m, t0, rp, 125, t1);/* a^{2^250-1} */
- ecc_mod_sqr (m, rp, t0); /* a^{2^251-2} */
- ecc_mod_sqr (m, t0, rp); /* a^{2^252-4} */
- ecc_mod_mul (m, rp, t0, ap); /* a^{2^252-3} */
+ ecc_mod_sqr (m, rp, t0, rp); /* a^{2^251-2} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^252-4} */
+ ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^252-3} */
#undef t0
#undef t1
#undef a7
= 1 + 2 (1 + 4 (2^{252}-3))
*/
ecc_mod_pow_252m3 (p, rp, ap, t0);
- ecc_mod_sqr (p, t0, rp);
- ecc_mod_sqr (p, rp, t0);
- ecc_mod_mul (p, t0, ap, rp);
- ecc_mod_sqr (p, rp, t0);
- ecc_mod_mul (p, t0, ap, rp);
+ ecc_mod_sqr (p, t0, rp, t0);
+ ecc_mod_sqr (p, rp, t0, rp);
+ ecc_mod_mul (p, t0, ap, rp, t0);
+ ecc_mod_sqr (p, rp, t0, rp);
+ ecc_mod_mul (p, t0, ap, rp, t0);
mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */
#undef t0
}
#define t0 (scratch + 2*ECC_LIMB_SIZE)
/* Live values */
- ecc_mod_sqr (p, v2, vp); /* v2 */
- ecc_mod_mul (p, uv, up, vp); /* uv, v2 */
- ecc_mod_mul (p, uv3, uv, v2); /* uv3, v2 */
- ecc_mod_sqr (p, v4, v2); /* uv3, v4 */
- ecc_mod_mul (p, uv7, uv3, v4); /* uv3, uv7 */
+ ecc_mod_sqr (p, v2, vp, v2); /* v2 */
+ ecc_mod_mul (p, uv, up, vp, uv); /* uv, v2 */
+ ecc_mod_mul (p, uv3, uv, v2, uv3); /* uv3, v2 */
+ ecc_mod_sqr (p, v4, v2, v4); /* uv3, v4 */
+ ecc_mod_mul (p, uv7, uv3, v4, uv7); /* uv3, uv7 */
ecc_mod_pow_252m3 (p, uv7p, uv7, scratch_out); /* uv3, uv7p */
- ecc_mod_mul (p, rp, uv7p, uv3); /* none */
+ ecc_mod_mul (p, rp, uv7p, uv3, rp); /* none */
/* Check sign. If square root exists, have v x^2 = ±u */
- ecc_mod_sqr (p, x2, rp);
- ecc_mod_mul (p, vx2, x2, vp);
+ ecc_mod_sqr (p, x2, rp, x2);
+ ecc_mod_mul (p, vx2, x2, vp, vx2);
ecc_mod_add (p, t0, vx2, up);
neg = ecc_curve25519_zero_p (p, t0);
ecc_mod_sub (p, t0, up, vx2);
pos = ecc_curve25519_zero_p (p, t0);
- ecc_mod_mul (p, t0, rp, ecc_sqrt_z);
+ ecc_mod_mul (p, t0, rp, ecc_sqrt_z, t0);
cnd_copy (neg, rp, t0, ECC_LIMB_SIZE);
return pos | neg;
#define t1 (scratch + 1*ECC_LIMB_SIZE)
#define t2 (scratch + 3*ECC_LIMB_SIZE)
- ecc_mod_sqr (p, rp, ap); /* a^2 */
- ecc_mod_mul (p, t0, ap, rp); /* a^3 */
- ecc_mod_sqr (p, rp, t0); /* a^6 */
- ecc_mod_mul (p, t0, ap, rp); /* a^{2^3-1} */
+ ecc_mod_sqr (p, rp, ap, rp); /* a^2 */
+ ecc_mod_mul (p, t0, ap, rp, t0); /* a^3 */
+ ecc_mod_sqr (p, rp, t0, rp); /* a^6 */
+ ecc_mod_mul (p, t0, ap, rp,t0); /* a^{2^3-1} */
ecc_mod_pow_2kp1 (p, t1, t0, 3, rp); /* a^{2^6-1} */
ecc_mod_pow_2k (p, rp, t1, 3, t2); /* a^{2^9-2^3} */
- ecc_mod_mul (p, t2, t0, rp); /* a^{2^9-1} */
+ ecc_mod_mul (p, t2, t0, rp, t2); /* a^{2^9-1} */
ecc_mod_pow_2kp1 (p, t0, t2, 9, rp); /* a^{2^18-1} */
- ecc_mod_sqr (p, t1, t0); /* a^{2^19-2} */
- ecc_mod_mul (p, rp, ap, t1); /* a^{2^19-1} */
+ ecc_mod_sqr (p, t1, t0, t1); /* a^{2^19-2} */
+ ecc_mod_mul (p, rp, ap, t1, rp); /* a^{2^19-1} */
ecc_mod_pow_2k (p, t1, rp, 18, t2); /* a^{2^37-2^18} */
- ecc_mod_mul (p, rp, t0, t1); /* a^{2^37-1} */
+ ecc_mod_mul (p, rp, t0, t1, rp); /* a^{2^37-1} */
mpn_copyi (t0, rp, p->size);
ecc_mod_pow_2kp1 (p, rp, t0, 37, t2); /* a^{2^74-1} */
ecc_mod_pow_2k (p, t1, rp, 37, t2); /* a^{2^111-2^37} */
- ecc_mod_mul (p, rp, t0, t1); /* a^{2^111-1} */
+ ecc_mod_mul (p, rp, t0, t1, rp); /* a^{2^111-1} */
ecc_mod_pow_2kp1 (p, t0, rp, 111, t2);/* a^{2^222-1} */
- ecc_mod_sqr (p, t1, t0); /* a^{2^223-2} */
- ecc_mod_mul (p, rp, ap, t1); /* a^{2^223-1} */
+ ecc_mod_sqr (p, t1, t0, t1); /* a^{2^223-2} */
+ ecc_mod_mul (p, rp, ap, t1, rp); /* a^{2^223-1} */
ecc_mod_pow_2k (p, t1, rp, 223, t2); /* a^{2^446-2^223} */
- ecc_mod_mul (p, rp, t0, t1); /* a^{2^446-2^222-1} */
+ ecc_mod_mul (p, rp, t0, t1, rp); /* a^{2^446-2^222-1} */
#undef t0
#undef t1
#undef t2
#define t0 scratch
ecc_mod_pow_446m224m1 (p, rp, ap, scratch); /* a^{2^446-2^222-1} */
- ecc_mod_sqr (p, t0, rp); /* a^{2^447-2^223-2} */
- ecc_mod_sqr (p, rp, t0); /* a^{2^448-2^224-4} */
- ecc_mod_mul (p, t0, ap, rp); /* a^{2^448-2^224-3} */
+ ecc_mod_sqr (p, t0, rp, t0); /* a^{2^447-2^223-2} */
+ ecc_mod_sqr (p, rp, t0, rp); /* a^{2^448-2^224-4} */
+ ecc_mod_mul (p, t0, ap, rp, t0); /* a^{2^448-2^224-3} */
mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */
#undef t0
#define t0 (scratch + 2*ECC_LIMB_SIZE)
/* Live values */
- ecc_mod_sqr (p, u2, up); /* u2 */
- ecc_mod_mul (p, u3, u2, up); /* u3 */
- ecc_mod_mul (p, u3v, u3, vp); /* u3v */
- ecc_mod_mul (p, uv, up, vp); /* u3v, uv */
- ecc_mod_sqr (p, u2v2, uv); /* u3v, u2v2 */
- ecc_mod_mul (p, u5v3, u3v, u2v2); /* u3v, u5v3 */
+ ecc_mod_sqr (p, u2, up, u2); /* u2 */
+ ecc_mod_mul (p, u3, u2, up, u3); /* u3 */
+ ecc_mod_mul (p, u3v, u3, vp, u3v); /* u3v */
+ ecc_mod_mul (p, uv, up, vp, uv); /* u3v, uv */
+ ecc_mod_sqr (p, u2v2, uv, u2v2); /* u3v, u2v2 */
+ ecc_mod_mul (p, u5v3, u3v, u2v2, u5v3); /* u3v, u5v3 */
ecc_mod_pow_446m224m1 (p, u5v3p, u5v3, scratch_out); /* u3v, u5v3p */
- ecc_mod_mul (p, rp, u5v3p, u3v); /* none */
+ ecc_mod_mul (p, rp, u5v3p, u3v, rp); /* none */
/* If square root exists, have v x^2 = u */
- ecc_mod_sqr (p, x2, rp);
- ecc_mod_mul (p, vx2, x2, vp);
+ ecc_mod_sqr (p, x2, rp, x2);
+ ecc_mod_mul (p, vx2, x2, vp, vx2);
ecc_mod_sub (p, t0, vx2, up);
return ecc_curve448_zero_p (p, t0);
/* b */
ecc_mod_add (&ecc->p, e, p, p + ecc->p.size);
- ecc_mod_sqr (&ecc->p, b, e);
+ ecc_mod_sqr (&ecc->p, b, e, b);
/* c */
- ecc_mod_sqr (&ecc->p, c, p);
+ ecc_mod_sqr (&ecc->p, c, p, c);
/* d */
- ecc_mod_sqr (&ecc->p, d, p + ecc->p.size);
+ ecc_mod_sqr (&ecc->p, d, p + ecc->p.size, d);
/* h, can use r as scratch, even for in-place operation. */
- ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size, r);
/* e, */
ecc_mod_add (&ecc->p, e, c, d);
/* j */
/* x' */
ecc_mod_sub (&ecc->p, b, b, e);
- ecc_mod_mul (&ecc->p, r, b, j);
+ ecc_mod_mul (&ecc->p, r, b, j, r);
/* y' */
ecc_mod_sub (&ecc->p, c, c, d); /* Redundant */
- ecc_mod_mul (&ecc->p, r + ecc->p.size, e, c);
+ ecc_mod_mul (&ecc->p, r + ecc->p.size, e, c, r + ecc->p.size);
/* z' */
- ecc_mod_mul (&ecc->p, b, e, j);
+ ecc_mod_mul (&ecc->p, b, e, j, b);
mpn_copyi (r + 2*ecc->p.size, b, ecc->p.size);
}
#define zp (p + 2*ecc->p.size)
/* delta */
- ecc_mod_sqr (&ecc->p, delta, zp);
+ ecc_mod_sqr (&ecc->p, delta, zp, delta);
/* gamma */
- ecc_mod_sqr (&ecc->p, gamma, yp);
+ ecc_mod_sqr (&ecc->p, gamma, yp, gamma);
/* z'. Can use beta area as scratch. */
ecc_mod_add (&ecc->p, r + 2*ecc->p.size, yp, zp);
- ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size, beta);
ecc_mod_sub (&ecc->p, beta, beta, gamma);
ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, beta, delta);
/* alpha. Can use beta area as scratch, and overwrite delta. */
ecc_mod_add (&ecc->p, sum, xp, delta);
ecc_mod_sub (&ecc->p, delta, xp, delta);
- ecc_mod_mul (&ecc->p, beta, sum, delta);
+ ecc_mod_mul (&ecc->p, beta, sum, delta, beta);
ecc_mod_mul_1 (&ecc->p, alpha, beta, 3);
/* beta */
- ecc_mod_mul (&ecc->p, beta, xp, gamma);
+ ecc_mod_mul (&ecc->p, beta, xp, gamma, beta);
/* Do gamma^2 and 4*beta early, to get them out of the way. We can
then use the old area at gamma as scratch. */
- ecc_mod_sqr (&ecc->p, g2, gamma);
+ ecc_mod_sqr (&ecc->p, g2, gamma, g2);
ecc_mod_mul_1 (&ecc->p, sum, beta, 4);
/* x' */
- ecc_mod_sqr (&ecc->p, gamma, alpha); /* Overwrites gamma and beta */
+ ecc_mod_sqr (&ecc->p, gamma, alpha, gamma); /* Overwrites gamma and beta */
ecc_mod_submul_1 (&ecc->p, gamma, sum, 2);
mpn_copyi (r, gamma, ecc->p.size);
/* y' */
ecc_mod_sub (&ecc->p, sum, sum, r);
- ecc_mod_mul (&ecc->p, gamma, sum, alpha);
+ ecc_mod_mul (&ecc->p, gamma, sum, alpha, gamma);
ecc_mod_submul_1 (&ecc->p, gamma, g2, 8);
mpn_copyi (r + ecc->p.size, gamma, ecc->p.size);
}
/* B */
ecc_mod_add (&ecc->p, F, p, p + ecc->p.size);
- ecc_mod_sqr (&ecc->p, B, F);
+ ecc_mod_sqr (&ecc->p, B, F, B);
/* C */
- ecc_mod_sqr (&ecc->p, C, p);
+ ecc_mod_sqr (&ecc->p, C, p, C);
/* D */
- ecc_mod_sqr (&ecc->p, D, p + ecc->p.size);
+ ecc_mod_sqr (&ecc->p, D, p + ecc->p.size, D);
/* Can use r as scratch, even for in-place operation. */
- ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size, r);
/* F, */
ecc_mod_sub (&ecc->p, F, D, C);
/* B - C - D */
ecc_mod_sub (&ecc->p, J, r, F);
/* x' */
- ecc_mod_mul (&ecc->p, r, B, J);
+ ecc_mod_mul (&ecc->p, r, B, J, r);
/* y' */
- ecc_mod_mul (&ecc->p, r + ecc->p.size, F, C);
+ ecc_mod_mul (&ecc->p, r + ecc->p.size, F, C, r + ecc->p.size);
/* z' */
- ecc_mod_mul (&ecc->p, B, F, J);
+ ecc_mod_mul (&ecc->p, B, F, J, B);
mpn_copyi (r + 2*ecc->p.size, B, ecc->p.size);
}
/* Process hash digest */
ecc_hash (&ecc->q, hp, length, digest);
- ecc_mod_mul (&ecc->q, tp, zp, rp);
+ ecc_mod_mul (&ecc->q, tp, zp, rp, tp);
ecc_mod_add (&ecc->q, hp, hp, tp);
- ecc_mod_mul (&ecc->q, tp, hp, kinv);
+ ecc_mod_mul (&ecc->q, tp, hp, kinv, tp);
mpn_copyi (sp, tp, ecc->p.size);
#undef P
/* u1 = h / s, P1 = u1 * G */
ecc_hash (&ecc->q, hp, length, digest);
- ecc_mod_mul (&ecc->q, u1, hp, sinv);
+ ecc_mod_mul (&ecc->q, u1, hp, sinv, u1);
/* u2 = r / s, P2 = u2 * Y */
- ecc_mod_mul (&ecc->q, u2, rp, sinv);
+ ecc_mod_mul (&ecc->q, u2, rp, sinv, u2);
/* Total storage: 5*ecc->p.size + ecc->mul_itch */
ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size);
/* Needs 2*size + scratch for the invert call. */
ecc->p.invert (&ecc->p, izp, zp, tp + ecc->p.size);
- ecc_mod_mul (&ecc->p, tp, xp, izp);
+ ecc_mod_mul (&ecc->p, tp, xp, izp, tp);
cy = mpn_sub_n (r, tp, ecc->p.m, ecc->p.size);
cnd_copy (cy, r, tp, ecc->p.size);
- ecc_mod_mul (&ecc->p, tp, yp, izp);
+ ecc_mod_mul (&ecc->p, tp, yp, izp, tp);
cy = mpn_sub_n (r + ecc->p.size, tp, ecc->p.m, ecc->p.size);
cnd_copy (cy, r + ecc->p.size, tp, ecc->p.size);
}
if (mpn_zero_p (hp, ecc->p.size))
mpn_add_1 (hp, hp, ecc->p.size, 1);
- ecc_mod_mul (&ecc->q, tp, rp, zp);
- ecc_mod_mul (&ecc->q, t2p, kp, hp);
+ ecc_mod_mul (&ecc->q, tp, rp, zp, tp);
+ ecc_mod_mul (&ecc->q, t2p, kp, hp, t2p);
ecc_mod_add (&ecc->q, sp, tp, t2p);
/* Also reduce mod ecc->q. It should already be < 2*ecc->q,
ecc->q.invert (&ecc->q, vp, hp, vp + 2*ecc->p.size);
/* z1 = s / h, P1 = z1 * G */
- ecc_mod_mul (&ecc->q, z1, sp, vp);
+ ecc_mod_mul (&ecc->q, z1, sp, vp, z1);
/* z2 = - r / h, P2 = z2 * Y */
- ecc_mod_mul (&ecc->q, z2, rp, vp);
+ ecc_mod_mul (&ecc->q, z2, rp, vp, z2);
mpn_sub_n (z2, ecc->q.m, z2, ecc->p.size);
/* Total storage: 5*ecc->p.size + ecc->mul_itch */
ecc_mod_submul_1 (const struct ecc_modulo *m, mp_limb_t *rp,
const mp_limb_t *ap, mp_limb_t b);
-/* The mul and sqr functions need 2*m->size limbs at rp */
+/* The mul and sqr function need 2*m->size limbs at tp. rp may overlap
+ ap or bp, and may equal tp or tp + m->size, but no other overlap
+ with tp is allowed. */
void
ecc_mod_mul (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap, const mp_limb_t *bp);
+ const mp_limb_t *ap, const mp_limb_t *bp, mp_limb_t *tp);
void
ecc_mod_sqr (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap);
+ const mp_limb_t *ap, mp_limb_t *tp);
-/* The pow functions needs 2*m->size limbs at both rp and tp. */
-/* R <-- X^{2^k} */
+/* R <-- X^{2^k} mod M. Needs 2*ecc->size limbs of scratch space, same
+ overlap requirements as mul and sqr above. */
void
ecc_mod_pow_2k (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
unsigned k, mp_limb_t *tp);
-/* R <-- X^{2^k} Y */
+/* R <-- X^{2^k} Y mod M. Similar requirements as ecc_mod_pow_2k, but
+ rp and yp can't overlap. */
void
ecc_mod_pow_2k_mul (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
mp_limb_t cy;
ecc->p.invert (&ecc->p, izp, p+2*ecc->p.size, izp + 2 * ecc->p.size);
- ecc_mod_sqr (&ecc->p, iz2p, izp);
+ ecc_mod_sqr (&ecc->p, iz2p, izp, iz2p);
if (ecc->use_redc)
{
}
/* r_x <-- x / z^2 */
- ecc_mod_mul (&ecc->p, iz3p, iz2p, p);
+ ecc_mod_mul (&ecc->p, iz3p, iz2p, p, iz3p);
/* ecc_mod (and ecc_mod_mul) may return a value up to 2p - 1, so
do a conditional subtraction. */
cy = mpn_sub_n (r, iz3p, ecc->p.m, ecc->p.size);
}
return;
}
- ecc_mod_mul (&ecc->p, iz3p, iz2p, izp);
- ecc_mod_mul (&ecc->p, tp, iz3p, p + ecc->p.size);
+ ecc_mod_mul (&ecc->p, iz3p, iz2p, izp, iz3p);
+ ecc_mod_mul (&ecc->p, tp, iz3p, p + ecc->p.size, tp);
/* And a similar subtraction. */
cy = mpn_sub_n (r + ecc->p.size, tp, ecc->p.m, ecc->p.size);
cnd_copy (cy, r + ecc->p.size, tp, ecc->p.size);
assert (hi == 0);
}
-/* NOTE: mul and sqr needs 2*m->size limbs at rp */
void
ecc_mod_mul (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap, const mp_limb_t *bp)
+ const mp_limb_t *ap, const mp_limb_t *bp, mp_limb_t *tp)
{
- mpn_mul_n (rp, ap, bp, m->size);
- m->reduce (m, rp, rp);
+ mpn_mul_n (tp, ap, bp, m->size);
+ m->reduce (m, rp, tp);
}
void
ecc_mod_sqr (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap)
+ const mp_limb_t *ap, mp_limb_t *tp)
{
- mpn_sqr (rp, ap, m->size);
- m->reduce (m, rp, rp);
+ mpn_sqr (tp, ap, m->size);
+ m->reduce (m, rp, tp);
}
-/* Compute R <-- X^{2^k} mod M. Needs 2*ecc->size limbs at rp, and
- 2*ecc->size additional limbs of scratch space. No overlap
- allowed. */
void
ecc_mod_pow_2k (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
unsigned k, mp_limb_t *tp)
{
- if (k & 1)
- {
- ecc_mod_sqr (m, rp, xp);
- k--;
- }
- else
- {
- ecc_mod_sqr (m, tp, xp);
- ecc_mod_sqr (m, rp, tp);
- k -= 2;
- }
- while (k > 0)
- {
- ecc_mod_sqr (m, tp, rp);
- ecc_mod_sqr (m, rp, tp);
- k -= 2;
- }
+ ecc_mod_sqr (m, rp, xp, tp);
+ while (--k > 0)
+ ecc_mod_sqr (m, rp, rp, tp);
}
-/* Computes R <-- X^{2^k} * Y. Scratch requirements as ecc_mod_pow_2k. */
void
ecc_mod_pow_2k_mul (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
unsigned k, const mp_limb_t *yp,
mp_limb_t *tp)
{
- ecc_mod_pow_2k (m, tp, xp, k, rp);
- ecc_mod_mul (m, rp, tp, yp);
+ ecc_mod_pow_2k (m, rp, xp, k, tp);
+ ecc_mod_mul (m, rp, rp, yp, tp);
}
/* Get x3, z3 from doubling. Since most significant bit is forced to 1. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A);
- ecc_mod_sqr (m, BB, B);
- ecc_mod_mul (m, x3, AA, BB);
+ ecc_mod_sqr (m, AA, A, AA);
+ ecc_mod_sqr (m, BB, B, BB);
+ ecc_mod_mul (m, x3, AA, BB, x3);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_mul (m, z3, E, AA);
+ ecc_mod_mul (m, z3, E, AA, z3);
for (i = bit_high; i >= bit_low; i--)
{
limbs. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A);
- ecc_mod_sqr (m, BB, B);
- ecc_mod_mul (m, x2, AA, BB); /* Last use of BB */
+ ecc_mod_sqr (m, AA, A, AA);
+ ecc_mod_sqr (m, BB, B, BB);
+ ecc_mod_mul (m, x2, AA, BB, x2); /* Last use of BB */
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
ecc_mod_add (m, C, x3, z3);
ecc_mod_sub (m, D, x3, z3);
- ecc_mod_mul (m, z2, E, AA); /* Last use of E and AA */
- ecc_mod_mul (m, DA, D, A); /* Last use of D, A. FIXME: could
- let CB overlap. */
- ecc_mod_mul (m, CB, C, B);
+ ecc_mod_mul (m, z2, E, AA, z2); /* Last use of E and AA */
+ ecc_mod_mul (m, DA, D, A, DA); /* Last use of D, A. FIXME: could
+ let CB overlap. */
+ ecc_mod_mul (m, CB, C, B, CB);
ecc_mod_add (m, C, DA, CB);
- ecc_mod_sqr (m, x3, C);
+ ecc_mod_sqr (m, x3, C, x3);
ecc_mod_sub (m, C, DA, CB);
- ecc_mod_sqr (m, DA, C);
- ecc_mod_mul (m, z3, DA, px);
+ ecc_mod_sqr (m, DA, C, DA);
+ ecc_mod_mul (m, z3, DA, px, z3);
/* FIXME: Could be combined with the loop's initial mpn_cnd_swap. */
mpn_cnd_swap (bit, x2, x3, 2*m->size);
{
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A);
- ecc_mod_sqr (m, BB, B);
- ecc_mod_mul (m, x2, AA, BB);
+ ecc_mod_sqr (m, AA, A, AA);
+ ecc_mod_sqr (m, BB, B, BB);
+ ecc_mod_mul (m, x2, AA, BB, x2);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_mul (m, z2, E, AA);
+ ecc_mod_mul (m, z2, E, AA, z2);
}
assert (m->invert_itch <= 7 * m->size);
m->invert (m, x3, z2, z3 + m->size);
- ecc_mod_mul (m, z3, x2, x3);
+ ecc_mod_mul (m, z3, x2, x3, z3);
cy = mpn_sub_n (qx, z3, m->m, m->size);
cnd_copy (cy, qx, z3, m->size);
}
/* For a valid input, y < p, so subtraction should underflow. */
res &= mpn_sub_n (scratch, scratch, ecc->p.m, ecc->p.size);
- ecc_mod_sqr (&ecc->p, y2, yp);
- ecc_mod_mul (&ecc->p, vp, y2, ecc->b);
+ ecc_mod_sqr (&ecc->p, y2, yp, y2);
+ ecc_mod_mul (&ecc->p, vp, y2, ecc->b, vp);
ecc_mod_sub (&ecc->p, vp, vp, ecc->unit);
/* The sign is different between curve25519 and curve448. */
if (ecc->p.bit_size == 255)
eddsa->digest (ctx, 2*nbytes, hash);
_eddsa_hash (&ecc->q, hp, 2*nbytes, hash);
- ecc_mod_mul (&ecc->q, sp, hp, k2);
+ ecc_mod_mul (&ecc->q, sp, hp, k2, sp);
ecc_mod_add (&ecc->q, sp, sp, rp); /* FIXME: Can be plain add */
if (ecc->p.bit_size == 255)
{
#define t0 scratch
#define t1 (scratch + p->size)
- ecc_mod_mul (p, t0, x1, z2);
+ ecc_mod_mul (p, t0, x1, z2, t0);
if (mpn_cmp (t0, p->m, p->size) >= 0)
mpn_sub_n (t0, t0, p->m, p->size);
- ecc_mod_mul (p, t1, x2, z1);
+ ecc_mod_mul (p, t1, x2, z1, t1);
if (mpn_cmp (t1, p->m, p->size) >= 0)
mpn_sub_n (t1, t1, p->m, p->size);
if (mpn_zero_p (UKM, size))
UKM[0] = 1;
- ecc_mod_mul (&ecc->q, TEMP, priv->p, UKM); /* TEMP = UKM * priv */
+ ecc_mod_mul (&ecc->q, TEMP, priv->p, UKM, TEMP); /* TEMP = UKM * priv */
ecc->mul (ecc, XYZ, TEMP, pub->p, scratch + 4*size); /* XYZ = UKM * priv * pub */
ecc->h_to_a (ecc, 0, TEMP, XYZ, scratch + 5*size); /* TEMP = XYZ */
mpn_get_base256_le (out, bsize, TEMP, size);