mpn_cnd_add_n (cy, rp, xp, q->m, ECC_LIMB_SIZE);
}
-/* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 5 * n scratch
+/* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 4 * n scratch
space. */
static void
ecc_mod_pow_252m3 (const struct ecc_modulo *m,
{
#define a7 scratch
#define t0 (scratch + ECC_LIMB_SIZE)
-#define t1 (scratch + 3*ECC_LIMB_SIZE)
+#define tp (scratch + 2*ECC_LIMB_SIZE)
/* a^{2^252 - 3} = a^{(p-5)/8}, using the addition chain
2^252 - 3
= 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(1+2(2^3+1)*7)))
*/
- ecc_mod_pow_2kp1 (m, t0, ap, 1, t1); /* a^3 */
- ecc_mod_sqr (m, rp, t0, rp); /* a^6 */
- ecc_mod_mul (m, a7, rp, ap, a7); /* a^7 */
- ecc_mod_pow_2kp1 (m, rp, a7, 3, t0); /* a^63 = a^{2^6-1} */
- ecc_mod_sqr (m, t0, rp, t0); /* a^{2^7-2} */
- ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^7-1} */
- ecc_mod_pow_2kp1 (m, t0, rp, 7, t1); /* a^{2^14-1}*/
- ecc_mod_pow_2kp1 (m, rp, t0, 14, t1); /* a^{2^28-1} */
- ecc_mod_sqr (m, t0, rp, t0); /* a^{2^29-2} */
- ecc_mod_sqr (m, t1, t0, t1); /* a^{2^30-4} */
- ecc_mod_sqr (m, t0, t1, t0); /* a^{2^31-8} */
- ecc_mod_mul (m, rp, t0, a7, rp); /* a^{2^31-1} */
- ecc_mod_pow_2kp1 (m, t0, rp, 31, t1); /* a^{2^62-1} */
- ecc_mod_pow_2kp1 (m, rp, t0, 62, t1); /* a^{2^124-1}*/
- ecc_mod_sqr (m, t0, rp, t0); /* a^{2^125-2} */
- ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^125-1} */
- ecc_mod_pow_2kp1 (m, t0, rp, 125, t1);/* a^{2^250-1} */
- ecc_mod_sqr (m, rp, t0, rp); /* a^{2^251-2} */
- ecc_mod_sqr (m, t0, rp, t0); /* a^{2^252-4} */
- ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^252-3} */
-#undef t0
-#undef t1
+ ecc_mod_pow_2kp1 (m, a7, ap, 1, tp); /* a^3 */
+ ecc_mod_sqr (m, a7, a7, tp); /* a^6 */
+ ecc_mod_mul (m, a7, a7, ap, tp); /* a^7 */
+ ecc_mod_pow_2kp1 (m, rp, a7, 3, tp); /* a^63 = a^{2^6-1} */
+ ecc_mod_sqr (m, rp, rp, tp); /* a^{2^7-2} */
+ ecc_mod_mul (m, rp, rp, ap, tp); /* a^{2^7-1} */
+ ecc_mod_pow_2kp1 (m, t0, rp, 7, tp); /* a^{2^14-1}*/
+ ecc_mod_pow_2kp1 (m, rp, t0, 14, tp); /* a^{2^28-1} */
+ ecc_mod_sqr (m, rp, rp, tp); /* a^{2^29-2} */
+ ecc_mod_sqr (m, rp, rp, tp); /* a^{2^30-4} */
+ ecc_mod_sqr (m, rp, rp, tp); /* a^{2^31-8} */
+ ecc_mod_mul (m, rp, rp, a7, tp); /* a^{2^31-1} */
+ ecc_mod_pow_2kp1 (m, t0, rp, 31, tp); /* a^{2^62-1} */
+ ecc_mod_pow_2kp1 (m, rp, t0, 62, tp); /* a^{2^124-1}*/
+ ecc_mod_sqr (m, rp, rp, tp); /* a^{2^125-2} */
+ ecc_mod_mul (m, rp, rp, ap, tp); /* a^{2^125-1} */
+ ecc_mod_pow_2kp1 (m, t0, rp, 125, tp);/* a^{2^250-1} */
+ ecc_mod_sqr (m, rp, t0, tp); /* a^{2^251-2} */
+ ecc_mod_sqr (m, rp, rp, tp); /* a^{2^252-4} */
+ ecc_mod_mul (m, rp, rp, ap, tp); /* a^{2^252-3} */
#undef a7
+#undef t0
+#undef tp
}
-/* Needs 5*ECC_LIMB_SIZE scratch space. */
-#define ECC_25519_INV_ITCH (5*ECC_LIMB_SIZE)
+/* Scratch as for ecc_mod_pow_252m3 above. */
+#define ECC_25519_INV_ITCH (4*ECC_LIMB_SIZE)
-static void ecc_curve25519_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *scratch)
+static void
+ecc_curve25519_inv (const struct ecc_modulo *p,
+ mp_limb_t *rp, const mp_limb_t *ap,
+ mp_limb_t *scratch)
{
-#define t0 scratch
-
/* Addition chain
p - 2 = 2^{255} - 21
= 1 + 2 (1 + 4 (2^{252}-3))
*/
- ecc_mod_pow_252m3 (p, rp, ap, t0);
- ecc_mod_sqr (p, t0, rp, t0);
- ecc_mod_sqr (p, rp, t0, rp);
- ecc_mod_mul (p, t0, ap, rp, t0);
- ecc_mod_sqr (p, rp, t0, rp);
- ecc_mod_mul (p, t0, ap, rp, t0);
- mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */
-#undef t0
+ ecc_mod_pow_252m3 (p, rp, ap, scratch);
+ ecc_mod_sqr (p, rp, rp, scratch);
+ ecc_mod_sqr (p, rp, rp, scratch);
+ ecc_mod_mul (p, rp, ap, rp, scratch);
+ ecc_mod_sqr (p, rp, rp, scratch);
+ ecc_mod_mul (p, rp, ap, rp, scratch);
}
/* First, do a canonical reduction, then check if zero */