+2014-09-23 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-mod-inv.c (ecc_mod_inv): Interface change, make ap input
+ const, and require 2n limbs at rp. Preparing for powm-based
+ alternative implementations. Drop #if:ed out code and dp
+ temporary. Updated all callers, more complicated cases described
+ below.
+ * ecc-internal.h (typedef ecc_mod_inv_func): Added const to input
+ argument.
+ (ECC_MOD_INV_ITCH): Renamed, was ECC_MODINV_ITCH, and reduced to
+ 2*n.
+ * ecc-ecdsa-verify.c (ecc_ecdsa_verify): Overhauled allocation,
+ putting mod_inv scratch at the end.
+
2014-09-22 Niels Möller <nisse@lysator.liu.se>
* ecc-random.c (ecc_mod_random): Renamed, and take a const struct
x = 0, and we should be fine, since ecc_modp_inv returns 0
in this case. */
ecc_modp_sub (ecc, t0, wp, vp);
- /* Needs 3*size scratch, for a total of 5*size */
- ecc->p.invert (&ecc->p, t1, t0, t2);
+ /* Needs a total of 5*size storage. */
+ ecc->p.invert (&ecc->p, t1, t0, t2 + ecc->p.size);
ecc_modp_add (ecc, t0, wp, vp);
ecc_modp_mul (ecc, t2, t0, t1);
ecc_modp_addmul_1 (ecc, AA, E, 121665);
ecc_modp_mul (ecc, z2, E, AA);
}
- ecc->p.invert (&ecc->p, x3, z2, z3);
+ ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size);
ecc_modp_mul (ecc, z3, x2, x3);
cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size);
cnd_copy (cy, x2, z3, ecc->p.size);
/* x coordinate only, modulo q */
ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
- /* Invert k, uses 5 * ecc->p.size including scratch */
- mpn_copyi (hp, kp, ecc->p.size);
- ecc->q.invert (&ecc->q, kinv, hp, tp);
+ /* Invert k, uses 4 * ecc->p.size including scratch */
+ ecc->q.invert (&ecc->q, kinv, kp, tp); /* NOTE: Also clobbers hp */
/* Process hash digest */
ecc_hash (ecc, hp, length, digest);
*/
#define P2 scratch
-#define P1 (scratch + 3*ecc->p.size)
-#define sinv (scratch + 3*ecc->p.size)
+#define u1 (scratch + 3*ecc->p.size)
#define u2 (scratch + 4*ecc->p.size)
-#define hp (scratch + 4*ecc->p.size)
-#define u1 (scratch + 6*ecc->p.size)
+
+#define P1 (scratch + 4*ecc->p.size)
+#define sinv (scratch)
+#define hp (scratch + ecc->p.size)
if (! (ecdsa_in_range (ecc, rp)
&& ecdsa_in_range (ecc, sp)))
/* FIXME: Micro optimizations: Either simultaneous multiplication.
Or convert to projective coordinates (can be done without
division, I think), and write an ecc_add_ppp. */
-
- /* Compute sinv, use P2 as scratch */
- mpn_copyi (sinv + ecc->p.size, sp, ecc->p.size);
- ecc->q.invert (&ecc->q, sinv, sinv + ecc->p.size, P2);
+
+ /* Compute sinv */
+ ecc->q.invert (&ecc->q, sinv, sp, sinv + 2*ecc->p.size);
+
+ /* u1 = h / s, P1 = u1 * G */
+ ecc_hash (ecc, hp, length, digest);
+ ecc_modq_mul (ecc, u1, hp, sinv);
/* u2 = r / s, P2 = u2 * Y */
ecc_modq_mul (ecc, u2, rp, sinv);
/* Total storage: 5*ecc->p.size + ecc->mul_itch */
ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size);
- /* u1 = h / s, P1 = u1 * G */
- ecc_hash (ecc, hp, length, digest);
- ecc_modq_mul (ecc, u1, hp, sinv);
-
/* u = 0 can happen only if h = 0 or h = q, which is extremely
unlikely. */
if (!zero_p (u1, ecc->p.size))
{
- /* Total storage: 6*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */
- ecc->mul_g (ecc, P1, u1, u1 + ecc->p.size);
+ /* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */
+ ecc->mul_g (ecc, P1, u1, P1 + 3*ecc->p.size);
/* NOTE: ecc_add_jjj and/or ecc_j_to_a will produce garbage in
case u1 G = +/- u2 V. However, anyone who gets his or her
private key by guessing.
*/
/* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */
- ecc->add_hhh (ecc, P1, P1, P2, u1);
+ ecc->add_hhh (ecc, P1, P1, P2, P1 + 3*ecc->p.size);
}
/* x coordinate only, modulo q */
- ecc->h_to_a (ecc, 2, P2, P1, u1);
+ ecc->h_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size);
return (mpn_cmp (rp, P2, ecc->p.size) == 0);
#undef P2
mp_limb_t cy;
- mpn_copyi (tp, zp, ecc->p.size);
- /* Needs 3*size scratch */
- ecc->p.invert (&ecc->p, izp, tp, tp + ecc->p.size);
+ /* Needs 2*size scratch */
+ ecc->p.invert (&ecc->p, izp, zp, tp + ecc->p.size);
ecc_modp_mul (ecc, tp, xp, izp);
cy = mpn_sub_n (r, tp, ecc->p.m, ecc->p.size);
typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp);
typedef void ecc_mod_inv_func (const struct ecc_modulo *m,
- mp_limb_t *vp, mp_limb_t *ap,
+ mp_limb_t *vp, const mp_limb_t *ap,
mp_limb_t *scratch);
typedef void ecc_add_func (const struct ecc_curve *ecc,
mp_limb_t *scratch);
/* Current scratch needs: */
-#define ECC_MODINV_ITCH(size) (3*(size))
+#define ECC_MOD_INV_ITCH(size) (2*(size))
#define ECC_J_TO_A_ITCH(size) (5*(size))
#define ECC_EH_TO_A_ITCH(size) (4*(size))
#define ECC_DUP_JJ_ITCH(size) (5*(size))
mp_limb_t *scratch)
{
#define izp scratch
-#define up (scratch + ecc->p.size)
+#define up (scratch + 2*ecc->p.size)
#define iz2p (scratch + ecc->p.size)
#define iz3p (scratch + 2*ecc->p.size)
#define izBp (scratch + 3*ecc->p.size)
/* Set v = (r_z / B^2)^-1,
r_x = p_x v^2 / B^3 = ((v/B * v)/B * p_x)/B
- r_y = p_y v^3 / B^4 = (((v/B * v)/B * v)/B * p_x)/B
-
- Skip the first redc, if we want to stay in Montgomery
- representation.
+ r_y = p_y v^3 / B^4 = (((v/B * v)/B * v)/B * p_y)/B
*/
mpn_copyi (up, p + 2*ecc->p.size, ecc->p.size);
/* Compute a^{-1} mod m, with running time depending only on the size.
Returns zero if a == 0 (mod m), to be consistent with a^{phi(m)-1}.
- Also needs (m+1)/2, and m must be odd. */
+ Also needs (m+1)/2, and m must be odd.
+
+ Needs 2n limbs available at rp, and 2n additional scratch limbs.
+*/
/* FIXME: Could use mpn_sec_invert (in GMP-6), but with a bit more
scratch need since it doesn't precompute (m+1)/2. */
void
ecc_mod_inv (const struct ecc_modulo *m,
- mp_limb_t *vp, mp_limb_t *ap,
+ mp_limb_t *vp, const mp_limb_t *in_ap,
mp_limb_t *scratch)
{
-#define bp scratch
-#define dp (scratch + n)
-#define up (scratch + 2*n)
+#define ap scratch
+#define bp (scratch + n)
+#define up (vp + n)
mp_size_t n = m->size;
/* Avoid the mp_bitcnt_t type for compatibility with older GMP
mpn_zero (up+1, n - 1);
mpn_copyi (bp, m->m, n);
mpn_zero (vp, n);
+ mpn_copyi (ap, in_ap, n);
for (i = m->bit_size + GMP_NUMB_BITS * n; i-- > 0; )
{
assert (bp[0] & 1);
odd = ap[0] & 1;
- /* Which variant is fastest depends on the speed of the various
- cnd_* functions. Assembly implementation would help. */
-#if 1
swap = cnd_sub_n (odd, ap, bp, n);
cnd_add_n (swap, bp, ap, n);
cnd_neg (swap, ap, ap, n);
-#else
- swap = odd & mpn_sub_n (dp, ap, bp, n);
- cnd_copy (swap, bp, ap, n);
- cnd_neg (swap, dp, dp, n);
- cnd_copy (odd, ap, dp, n);
-#endif
-#if 1
cnd_swap (swap, up, vp, n);
cy = cnd_sub_n (odd, up, vp, n);
cy -= cnd_add_n (cy, up, m->m, n);
-#else
- cy = cnd_sub_n (odd, up, vp, n);
- cnd_add_n (swap, vp, up, n);
- cnd_neg (swap, up, up, n);
- cnd_add_n (cy ^ swap, up, m->p, n);
-#endif
+
cy = mpn_rshift (ap, ap, n, 1);
assert (cy == 0);
cy = mpn_rshift (up, up, n, 1);
assert (cy == 0);
}
assert ( (ap[0] | ap[n-1]) == 0);
+#undef ap
#undef bp
-#undef dp
#undef up
}
bench_modinv (void *p)
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
- mpn_copyi (ctx->rp + ctx->ecc->p.size, ctx->ap, ctx->ecc->p.size);
- ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->rp + ctx->ecc->p.size, ctx->tp);
+ ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp);
}
#if !NETTLE_USE_MINI_GMP
const struct ecc_modulo *m)
{
mp_limb_t a[MAX_ECC_SIZE];
- mp_limb_t ai[MAX_ECC_SIZE];
+ mp_limb_t ai[2*MAX_ECC_SIZE];
mp_limb_t ref[MAX_ECC_SIZE];
- mp_limb_t scratch[ECC_MODINV_ITCH (MAX_ECC_SIZE)];
+ mp_limb_t scratch[ECC_MOD_INV_ITCH (MAX_ECC_SIZE)];
unsigned j;
mpz_t r;
}
/* Check behaviour for a = m */
- mpn_copyi (a, m->m, m->size);
memset (ai, 17, m->size * sizeof(*ai));
- m->invert (m, ai, a, scratch);
+ m->invert (m, ai, m->m, scratch);
if (!mpn_zero_p (ai, m->size))
{
fprintf (stderr, "%s->invert failed for a = p input (bit size %u):\n",