From: Niels Möller Date: Tue, 3 Nov 2020 21:28:57 +0000 (+0100) Subject: Reduce scratch need for ecc_dup_th X-Git-Tag: nettle_3.7rc1~52^2~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=98eae4144069bb7d96b783e0e80e0307aaa19421;p=thirdparty%2Fnettle.git Reduce scratch need for ecc_dup_th --- diff --git a/ChangeLog b/ChangeLog index af84b05e..80697281 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,8 @@ 2020-11-03 Niels Möller * ecc-dup-eh.c (ecc_dup_eh): Reduce scratch need. - * ecc-internal.h (ECC_DUP_EH_ITCH): Now 3*size. + * ecc-dup-th.c (ecc_dup_th): Analogous changes. + * ecc-internal.h (ECC_DUP_EH_ITCH, ECC_DUP_TH_ITCH): Now 3*size. * ecc-internal.h (ecc_add_func): Document in-place operation. * ecc-mul-a-eh.c (ecc_mul_a_eh): Fix call to ecc->add_hhh accordingly. diff --git a/ecc-dup-th.c b/ecc-dup-th.c index c1582cd2..72b42458 100644 --- a/ecc-dup-th.c +++ b/ecc-dup-th.c @@ -42,6 +42,14 @@ ecc_dup_th (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *p, mp_limb_t *scratch) { +#define x1 p +#define y1 (p + ecc->p.size) +#define z1 (p + 2*ecc->p.size) + +#define x2 r +#define y2 (r + ecc->p.size) +#define z2 (r + 2*ecc->p.size) + /* Formulas (from djb, http://www.hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#doubling-dbl-2008-bbjlp): @@ -73,37 +81,32 @@ ecc_dup_th (const struct ecc_curve *ecc, 3M+4S */ - /* FIXME: Could reduce scratch need by reusing D storage. */ -#define B scratch -#define C (scratch + ecc->p.size) -#define D (scratch + 2*ecc->p.size) -#define F (scratch + 3*ecc->p.size) -#define J (scratch + 4*ecc->p.size) - - /* B */ - ecc_mod_add (&ecc->p, F, p, p + ecc->p.size); - ecc_mod_sqr (&ecc->p, B, F, B); - - /* C */ - ecc_mod_sqr (&ecc->p, C, p, C); - /* D */ - ecc_mod_sqr (&ecc->p, D, p + ecc->p.size, D); - /* Can use r as scratch, even for in-place operation. */ - ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size, r); - /* F, */ - ecc_mod_sub (&ecc->p, F, D, C); - /* B - C - D */ - ecc_mod_add (&ecc->p, C, C, D); - ecc_mod_sub (&ecc->p, B, B, C); - /* J */ - ecc_mod_add (&ecc->p, r, r, r); - ecc_mod_sub (&ecc->p, J, r, F); - - /* x' */ - ecc_mod_mul (&ecc->p, r, B, J, r); - /* y' */ - ecc_mod_mul (&ecc->p, r + ecc->p.size, F, C, r + ecc->p.size); - /* z' */ - ecc_mod_mul (&ecc->p, B, F, J, B); - mpn_copyi (r + 2*ecc->p.size, B, ecc->p.size); + +#define C scratch +#define D (scratch + 1*ecc->p.size) +#define B (scratch + 2*ecc->p.size) + +#define F C + + ecc_mod_sqr (&ecc->p, C, x1, C); /* C */ + ecc_mod_sqr (&ecc->p, D, y1, D); /* C, D */ + ecc_mod_add (&ecc->p, B, x1, y1); + ecc_mod_sqr (&ecc->p, B, B, x2); /* C, D, B */ + + /* C+D stored at y' */ + ecc_mod_add (&ecc->p, y2, C, D); + /* B - C - C stored at x' */ + ecc_mod_sub (&ecc->p, x2, B, y2); + + ecc_mod_sub (&ecc->p, F, D, C); /* F */ + + /* Use D as scratch for the following multiplies. */ + ecc_mod_mul (&ecc->p, y2, y2, F, D); + + /* H and J stored at z' */ + ecc_mod_sqr (&ecc->p, z2, z1, D); + ecc_mod_add (&ecc->p, z2, z2, z2); + ecc_mod_sub (&ecc->p, z2, z2, F); + ecc_mod_mul (&ecc->p, x2, x2, z2, D); + ecc_mod_mul (&ecc->p, z2, z2, F, D); } diff --git a/ecc-internal.h b/ecc-internal.h index 29a8c7c3..ff8c6f6a 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -446,7 +446,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, #define ECC_EH_TO_A_ITCH(size, inv) (2*(size)+(inv)) #define ECC_DUP_JJ_ITCH(size) (5*(size)) #define ECC_DUP_EH_ITCH(size) (3*(size)) -#define ECC_DUP_TH_ITCH(size) (5*(size)) +#define ECC_DUP_TH_ITCH(size) (3*(size)) #define ECC_ADD_JJA_ITCH(size) (6*(size)) #define ECC_ADD_JJJ_ITCH(size) (8*(size)) #define ECC_ADD_EH_ITCH(size) (4*(size))