From: Niels Möller Date: Mon, 19 Oct 2020 18:12:56 +0000 (+0200) Subject: Optimize modular inversion for secp521r1. X-Git-Tag: nettle_3.7rc1~48 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8b3f84f8c47e3f425d5a1d322cea3557cefc5c89;p=thirdparty%2Fnettle.git Optimize modular inversion for secp521r1. * ecc-secp521r1.c (ecc_secp521r1_inv): New function, modular inverse using powering. (_nettle_secp_521r1): Analogous updates. Increases signing performance roughly 15% on x86_64. --- diff --git a/ChangeLog b/ChangeLog index 02d4361b..0f71d045 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2020-10-19 Niels Möller + + * ecc-secp521r1.c (ecc_secp521r1_inv): New function, modular + inverse using powering. + (_nettle_secp_521r1): Analogous updates. Increases signing + performance roughly 15% on x86_64. + 2020-10-15 Niels Möller * ecc-secp192r1.c (ecc_secp192r1_inv): New function, modular diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c index ec875dbf..0d9f88fc 100644 --- a/ecc-secp521r1.c +++ b/ecc-secp521r1.c @@ -75,6 +75,53 @@ ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t } #endif +#define ECC_SECP521R1_INV_ITCH (3*ECC_LIMB_SIZE) + +static void +ecc_secp521r1_inv (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +#define t0 scratch +#define tp (scratch + ECC_LIMB_SIZE) + + /* Addition chain for p - 2: + + 2^{521} - 3 + = 1 + 2^2(2^519 - 1) + = 1 + 2^2(1 + 2 (2^518 - 1) + = 1 + 2^2(1 + 2 (2^259 + 1) (1 + 2(2^258 - 1))) + = 1 + 2^2(1 + 2 (2^259 + 1) (1 + 2(2^129 + 1) (2^129 - 1))) + = 1 + 2^2(1 + 2 (2^259 + 1) (1 + 2(2^129 + 1) (1 + 2 (2^128 - 1)))) + + where + + 2^{128} - 1 = (2^64 + 1) (2^32+1) (2^16 + 1) (2^8 + 1) (2^4 + 1) (2^2 + 1) (2 + 1) + + This addition chain needs 520 squarings and 13 multiplies. + */ + + ecc_mod_sqr (p, rp, ap, tp); /* a^2 */ + ecc_mod_mul (p, rp, ap, rp, tp); /* a^3 = a^{2^2 - 1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 2, tp); /* a^15 = a^{2^4 - 1} */ + ecc_mod_pow_2kp1 (p, rp, t0, 4, tp); /* a^{2^8 - 1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 8, tp); /* a^{2^16 - 1} */ + ecc_mod_pow_2kp1 (p, rp, t0, 16, tp); /* a^{2^32 - 1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 32, tp); /* a^{2^64 - 1} */ + ecc_mod_pow_2kp1 (p, rp, t0, 64, tp); /* a^{2^128 - 1} */ + ecc_mod_sqr (p, rp, rp, tp); /* a^{2^129 - 2} */ + ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^129 - 1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 129, tp);/* a^{2^258 - 1} */ + ecc_mod_sqr (p, rp, t0, tp); /* a^{2^259 - 2} */ + ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^259 - 1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 259, tp);/* a^{2^518 - 1} */ + ecc_mod_sqr (p, rp, t0, tp); /* a^{2^519 - 2} */ + ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^519 - 1} */ + ecc_mod_sqr (p, rp, rp, tp); /* a^{2^520 - 2} */ + ecc_mod_sqr (p, rp, rp, tp); /* a^{2^521 - 4} */ + ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^519 - 3} */ +} + const struct ecc_curve _nettle_secp_521r1 = { { @@ -82,7 +129,7 @@ const struct ecc_curve _nettle_secp_521r1 = ECC_LIMB_SIZE, ECC_BMODP_SIZE, ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + ECC_SECP521R1_INV_ITCH, 0, ecc_p, @@ -93,7 +140,7 @@ const struct ecc_curve _nettle_secp_521r1 = ecc_secp521r1_modp, ecc_secp521r1_modp, - ecc_mod_inv, + ecc_secp521r1_inv, NULL, }, { @@ -125,7 +172,7 @@ const struct ecc_curve _nettle_secp_521r1 = ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), ECC_MUL_A_ITCH (ECC_LIMB_SIZE), ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + 2*ECC_LIMB_SIZE + ECC_SECP521R1_INV_ITCH, ecc_add_jja, ecc_add_jjj,