From: Niels Möller Date: Thu, 5 Nov 2020 19:37:11 +0000 (+0100) Subject: Reduce scratch need for ecc_dup_jj X-Git-Tag: nettle_3.7rc1~52^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=001f561974b823418c8353df770b3b1b5129cde0;p=thirdparty%2Fnettle.git Reduce scratch need for ecc_dup_jj --- diff --git a/ChangeLog b/ChangeLog index 12a3b62d..4409d6f9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2020-11-05 Niels Möller + + * ecc-dup-jj.c (ecc_dup_jj): Reduce scratch need. + * ecc-internal.h (ECC_DUP_JJ_ITCH): Now 4*size. + 2020-11-03 Niels Möller * ecc-dup-eh.c (ecc_dup_eh): Reduce scratch need. diff --git a/ecc-dup-jj.c b/ecc-dup-jj.c index c338971b..c793097a 100644 --- a/ecc-dup-jj.c +++ b/ecc-dup-jj.c @@ -47,6 +47,14 @@ ecc_dup_jj (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *p, mp_limb_t *scratch) { +#define x1 p +#define y1 (p + ecc->p.size) +#define z1 (p + 2*ecc->p.size) + +#define x2 r +#define y2 (r + ecc->p.size) +#define z2 (r + 2*ecc->p.size) + /* Formulas (from djb, http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b): @@ -60,51 +68,37 @@ ecc_dup_jj (const struct ecc_curve *ecc, y' = alpha*(4*beta-x')-8*gamma^2 mul, sqr */ -#define delta scratch -#define gamma (scratch + ecc->p.size) -#define beta (scratch + 2*ecc->p.size) -#define g2 (scratch + 3*ecc->p.size) -#define sum (scratch + 4*ecc->p.size) -#define alpha scratch /* Overlap delta */ - -#define xp p -#define yp (p + ecc->p.size) -#define zp (p + 2*ecc->p.size) - - /* delta */ - ecc_mod_sqr (&ecc->p, delta, zp, delta); - - /* gamma */ - ecc_mod_sqr (&ecc->p, gamma, yp, gamma); - - /* z'. Can use beta area as scratch. */ - ecc_mod_add (&ecc->p, r + 2*ecc->p.size, yp, zp); - ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size, beta); - ecc_mod_sub (&ecc->p, beta, beta, gamma); - ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, beta, delta); - - /* alpha. Can use beta area as scratch, and overwrite delta. */ - ecc_mod_add (&ecc->p, sum, xp, delta); - ecc_mod_sub (&ecc->p, delta, xp, delta); - ecc_mod_mul (&ecc->p, beta, sum, delta, beta); - ecc_mod_mul_1 (&ecc->p, alpha, beta, 3); - - /* beta */ - ecc_mod_mul (&ecc->p, beta, xp, gamma, beta); - - /* Do gamma^2 and 4*beta early, to get them out of the way. We can - then use the old area at gamma as scratch. */ - ecc_mod_sqr (&ecc->p, g2, gamma, g2); - ecc_mod_mul_1 (&ecc->p, sum, beta, 4); - - /* x' */ - ecc_mod_sqr (&ecc->p, gamma, alpha, gamma); /* Overwrites gamma and beta */ - ecc_mod_submul_1 (&ecc->p, gamma, sum, 2); - mpn_copyi (r, gamma, ecc->p.size); - - /* y' */ - ecc_mod_sub (&ecc->p, sum, sum, r); - ecc_mod_mul (&ecc->p, gamma, sum, alpha, gamma); - ecc_mod_submul_1 (&ecc->p, gamma, g2, 8); - mpn_copyi (r + ecc->p.size, gamma, ecc->p.size); +#define gamma scratch +#define delta (scratch + ecc->p.size) +#define alpha delta + +#define beta (scratch + 2*ecc->p.size) +#define sum (scratch + 3*ecc->p.size) + + ecc_mod_sqr (&ecc->p, gamma, y1, gamma); /* x, y, z, gamma */ + ecc_mod_sqr (&ecc->p, delta, z1, delta); /* x, y, z, gamma, delta */ + + ecc_mod_add (&ecc->p, sum, z1, y1); /* x, gamma, delta, s */ + ecc_mod_sqr (&ecc->p, sum, sum, y2); /* Can use y-z as scratch */ + ecc_mod_sub (&ecc->p, z2, sum, delta); /* x, z, gamma, delta */ + ecc_mod_sub (&ecc->p, z2, z2, gamma); + + ecc_mod_mul (&ecc->p, beta, x1, gamma, beta); /* x, z, gamma, delta, beta */ + + ecc_mod_add (&ecc->p, sum, x1, delta); /* x, sum, z', gamma, delta, beta */ + ecc_mod_sub (&ecc->p, delta, x1, delta); /* sum, z', gamma, delta, beta */ + /* This multiplication peaks the storage need; can use x-y for scratch. */ + ecc_mod_mul (&ecc->p, alpha, sum, delta, x2); /* z', gamma, alpha, beta */ + ecc_mod_mul_1 (&ecc->p, alpha, alpha, 3); + + ecc_mod_mul_1 (&ecc->p, y2, beta, 4); + + /* From now on, can use beta as scratch. */ + ecc_mod_sqr (&ecc->p, x2, alpha, beta); /* alpha^2 */ + ecc_mod_submul_1 (&ecc->p, x2, y2, 2); /* alpha^2 - 8 beta */ + + ecc_mod_sub (&ecc->p, y2, y2, x2); /* 4 beta - x' */ + ecc_mod_mul (&ecc->p, y2, y2, alpha, beta); + ecc_mod_sqr (&ecc->p, gamma, gamma, beta); + ecc_mod_submul_1 (&ecc->p, y2, gamma, 8); } diff --git a/ecc-internal.h b/ecc-internal.h index 24c73155..a84387a9 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -444,7 +444,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, /* Only valid when using the general ecc_mod_inv/ecc_mod_inv_redc ! */ #define ECC_J_TO_A_ITCH(size) (4*(size)) #define ECC_EH_TO_A_ITCH(size, inv) (2*(size)+(inv)) -#define ECC_DUP_JJ_ITCH(size) (5*(size)) +#define ECC_DUP_JJ_ITCH(size) (4*(size)) #define ECC_DUP_EH_ITCH(size) (3*(size)) #define ECC_DUP_TH_ITCH(size) (3*(size)) #define ECC_ADD_JJA_ITCH(size) (6*(size))