+2020-10-30 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-internal.h (typedef ecc_mod_func): Add separate result
+ argument. Updated all C implementations and callers.
+
2020-10-29 Niels Möller <nisse@lysator.liu.se>
* ecc-mod.c (ecc_mod): More unified handling of final carry
mpn_copyd (r + ecc->p.size, p, 2*ecc->p.size);
mpn_zero (r, ecc->p.size);
- ecc->p.mod (&ecc->p, r);
+ ecc->p.mod (&ecc->p, r, r);
mpn_zero (r + ecc->p.size, ecc->p.size);
- ecc->p.mod (&ecc->p, r + ecc->p.size);
+ ecc->p.mod (&ecc->p, r + ecc->p.size, r + ecc->p.size);
}
else if (r != p)
mpn_copyi (r, p, 2*ecc->p.size);
#define ecc_curve25519_modp _nettle_ecc_curve25519_modp
void
-ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#else
#if PHIGH_BITS == 0
#endif
static void
-ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t hi, cy;
- cy = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE,
+ cy = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, ECC_LIMB_SIZE,
(mp_limb_t) 19 << PHIGH_BITS);
- hi = rp[ECC_LIMB_SIZE-1];
+ hi = xp[ECC_LIMB_SIZE-1];
cy = (cy << PHIGH_BITS) + (hi >> (GMP_NUMB_BITS - PHIGH_BITS));
rp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS))
- + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, 19 * cy);
+ + sec_add_1 (rp, xp, ECC_LIMB_SIZE - 1, 19 * cy);
}
#endif /* HAVE_NATIVE_ecc_curve25519_modp */
#endif
static void
-ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp)
+ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
{
mp_size_t n;
mp_limb_t cy;
/* n is the offset where we add in the next term */
for (n = ECC_LIMB_SIZE; n-- > 0;)
{
- cy = mpn_submul_1 (rp + n,
+ cy = mpn_submul_1 (xp + n,
q->B_shifted, ECC_LIMB_SIZE,
- rp[n + ECC_LIMB_SIZE]);
+ xp[n + ECC_LIMB_SIZE]);
/* Top limb of mBmodq_shifted is zero, so we get cy == 0 or 1 */
assert (cy < 2);
- mpn_cnd_add_n (cy, rp+n, rp+n, q->m, ECC_LIMB_SIZE);
+ mpn_cnd_add_n (cy, xp+n, xp+n, q->m, ECC_LIMB_SIZE);
}
- cy = mpn_submul_1 (rp, q->m, ECC_LIMB_SIZE,
- rp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS));
+ cy = mpn_submul_1 (xp, q->m, ECC_LIMB_SIZE,
+ xp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS));
assert (cy < 2);
- mpn_cnd_add_n (cy, rp, rp, q->m, ECC_LIMB_SIZE);
+ mpn_cnd_add_n (cy, rp, xp, q->m, ECC_LIMB_SIZE);
}
/* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 5 * n scratch
#if HAVE_NATIVE_ecc_curve448_modp
#define ecc_curve448_modp _nettle_ecc_curve448_modp
void
-ecc_curve448_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_curve448_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#elif GMP_NUMB_BITS == 64
static void
-ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
/* Let B = 2^64, b = 2^32 = sqrt(B).
p = B^7 - b B^3 - 1 ==> B^7 = b B^3 + 1
+----+----+----+----+----+----+----+
*/
mp_limb_t c3, c4, c7;
- mp_limb_t *tp = rp + 7;
+ mp_limb_t *tp = xp + 7;
- c4 = mpn_add_n (rp, rp, rp + 7, 4);
- c7 = mpn_addmul_1 (rp + 4, rp + 11, 3, 2);
- c3 = mpn_addmul_1 (rp, rp + 11, 3, (mp_limb_t) 1 << 32);
- c7 += mpn_addmul_1 (rp + 3, rp + 7, 4, (mp_limb_t) 1 << 32);
+ c4 = mpn_add_n (xp, xp, xp + 7, 4);
+ c7 = mpn_addmul_1 (xp + 4, xp + 11, 3, 2);
+ c3 = mpn_addmul_1 (xp, xp + 11, 3, (mp_limb_t) 1 << 32);
+ c7 += mpn_addmul_1 (xp + 3, xp + 7, 4, (mp_limb_t) 1 << 32);
tp[0] = c7;
tp[1] = tp[2] = 0;
tp[3] = c3 + (c7 << 32);
tp[4] = c4 + (c7 >> 32) + (tp[3] < c3);
tp[5] = tp[6] = 0;
- c7 = mpn_add_n (rp, rp, tp, 7);
+ c7 = mpn_add_n (rp, xp, tp, 7);
c7 = mpn_cnd_add_n (c7, rp, rp, m->B, 7);
assert (c7 == 0);
}
#include "ecc-gost-gc256b.h"
static void
-ecc_gost_gc256b_modp (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_gost_gc256b_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
mp_size_t mn = m->size;
mp_limb_t hi;
- hi = mpn_addmul_1(rp, rp + mn, mn, 0x269);
- hi = sec_add_1 (rp, rp, mn, hi * 0x269);
- hi = sec_add_1 (rp, rp, mn, hi * 0x269);
+ hi = mpn_addmul_1(xp, xp + mn, mn, 0x269);
+ hi = sec_add_1 (xp, xp, mn, hi * 0x269);
+ hi = sec_add_1 (rp, xp, mn, hi * 0x269);
assert(hi == 0);
}
#include "ecc-gost-gc512a.h"
static void
-ecc_gost_gc512a_modp (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_gost_gc512a_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
mp_size_t mn = m->size;
mp_limb_t hi;
- hi = mpn_addmul_1(rp, rp + mn, mn, 0x239);
- hi = sec_add_1 (rp, rp, mn, hi * 0x239);
- hi = sec_add_1 (rp, rp, mn, hi * 0x239);
+ hi = mpn_addmul_1(xp, xp + mn, mn, 0x239);
+ hi = sec_add_1 (xp, xp, mn, hi * 0x239);
+ hi = sec_add_1 (rp, xp, mn, hi * 0x239);
assert(hi == 0);
}
/* Reduces from 2*ecc->size to ecc->size. */
/* Required to return a result < 2q. This property is inherited by
- mod_mul and mod_sqr. */
-typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp);
+ mod_mul and mod_sqr. May clobber input xp. rp may point to the
+ start or the middle of the xp area, but no other overlap is
+ allowed. */
+typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
typedef void ecc_mod_inv_func (const struct ecc_modulo *m,
mp_limb_t *vp, const mp_limb_t *ap,
/* Divide this common factor by B, instead of applying redc to
both x and y outputs. */
mpn_zero (iz2p + ecc->p.size, ecc->p.size);
- ecc->p.reduce (&ecc->p, iz2p);
+ ecc->p.reduce (&ecc->p, iz2p, iz2p);
}
/* r_x <-- x / z^2 */
const mp_limb_t *ap, const mp_limb_t *bp)
{
mpn_mul_n (rp, ap, bp, m->size);
- m->reduce (m, rp);
+ m->reduce (m, rp, rp);
}
void
const mp_limb_t *ap)
{
mpn_sqr (rp, ap, m->size);
- m->reduce (m, rp);
+ m->reduce (m, rp, rp);
}
/* Compute R <-- X^{2^k} mod M. Needs 2*ecc->size limbs at rp, and
mpn_copyi (scratch, ap, m->size);
mpn_zero (scratch + m->size, m->size);
- m->reduce (m, scratch);
+ m->reduce (m, scratch, scratch);
mpn_zero (scratch + m->size, m->size);
- m->reduce (m, scratch);
+ m->reduce (m, scratch, scratch);
ecc_mod_inv_destructive (m, vp, scratch);
}
#include "ecc-internal.h"
-/* Computes r mod m, input 2*m->size, output m->size. */
+/* Computes r <-- x mod m, input 2*m->size, output m->size. It's
+ * allowed to have rp == xp or rp == xp + m->size, but no other kind
+ * of overlap is allowed. */
void
-ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t hi;
mp_size_t mn = m->size;
rn -= sn;
for (i = 0; i <= sn; i++)
- rp[rn+i-1] = mpn_addmul_1 (rp + rn - mn - 1 + i, m->B, bn, rp[rn+i-1]);
- rp[rn-1] = rp[rn+sn-1]
- + mpn_add_n (rp + rn - sn - 1, rp + rn - sn - 1, rp + rn - 1, sn);
+ xp[rn+i-1] = mpn_addmul_1 (xp + rn - mn - 1 + i, m->B, bn, xp[rn+i-1]);
+ xp[rn-1] = xp[rn+sn-1]
+ + mpn_add_n (xp + rn - sn - 1, xp + rn - sn - 1, xp + rn - 1, sn);
}
}
else
rn -= sn;
for (i = 0; i < sn; i++)
- rp[rn+i] = mpn_addmul_1 (rp + rn - mn + i, m->B, bn, rp[rn+i]);
+ xp[rn+i] = mpn_addmul_1 (xp + rn - mn + i, m->B, bn, xp[rn+i]);
- hi = mpn_add_n (rp + rn - sn, rp + rn - sn, rp + rn, sn);
- hi = mpn_cnd_add_n (hi, rp + rn - mn, rp + rn - mn, m->B, mn);
+ hi = mpn_add_n (xp + rn - sn, xp + rn - sn, xp + rn, sn);
+ hi = mpn_cnd_add_n (hi, xp + rn - mn, xp + rn - mn, m->B, mn);
assert (hi == 0);
}
}
assert (rn <= sn);
for (i = 0; i < rn; i++)
- rp[mn+i] = mpn_addmul_1 (rp + i, m->B, bn, rp[mn+i]);
+ xp[mn+i] = mpn_addmul_1 (xp + i, m->B, bn, xp[mn+i]);
- hi = mpn_add_n (rp + bn, rp + bn, rp + mn, rn);
+ hi = mpn_add_n (xp + bn, xp + bn, xp + mn, rn);
if (rn < sn)
- hi = sec_add_1 (rp + bn + rn, rp + bn + rn, sn - rn, hi);
+ hi = sec_add_1 (xp + bn + rn, xp + bn + rn, sn - rn, hi);
shift = m->size * GMP_NUMB_BITS - m->bit_size;
if (shift > 0)
{
/* Combine hi with top bits, add in */
- hi = (hi << shift) | (rp[mn-1] >> (GMP_NUMB_BITS - shift));
- rp[mn-1] = (rp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1))
- + mpn_addmul_1 (rp, m->B_shifted, mn-1, hi);
+ hi = (hi << shift) | (xp[mn-1] >> (GMP_NUMB_BITS - shift));
+ xp[mn-1] = (xp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1))
+ + mpn_addmul_1 (xp, m->B_shifted, mn-1, hi);
+ /* FIXME: Can this copying be eliminated? */
+ if (rp != xp)
+ mpn_copyi (rp, xp, mn);
}
else
{
- hi = mpn_cnd_add_n (hi, rp, rp, m->B, mn);
+ hi = mpn_cnd_add_n (hi, rp, xp, m->B, mn);
assert (hi == 0);
}
}
/* Use that 1 = - (p - 1) (mod p), and that at least one low limb of p
- 1 is zero. */
void
-ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
unsigned i;
mp_limb_t hi, cy;
mp_size_t k = m->redc_size;
for (i = 0; i < m->size; i++)
- rp[i] = mpn_submul_1 (rp + i + k,
- m->redc_mpm1, m->size - k, rp[i]);
- hi = mpn_sub_n (rp, rp + m->size, rp, m->size);
- cy = mpn_cnd_add_n (hi, rp, rp, m->m, m->size);
+ xp[i] = mpn_submul_1 (xp + i + k,
+ m->redc_mpm1, m->size - k, xp[i]);
+ hi = mpn_sub_n (xp, xp + m->size, xp, m->size);
+ cy = mpn_cnd_add_n (hi, rp, xp, m->m, m->size);
assert (cy == hi);
if (shift > 0)
/* Use that 1 = p + 1 (mod p), and that at least one low limb of p + 1
is zero. */
void
-ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
unsigned i;
mp_limb_t hi, cy;
mp_size_t k = m->redc_size;
for (i = 0; i < m->size; i++)
- rp[i] = mpn_addmul_1 (rp + i + k,
- m->redc_mpm1, m->size - k, rp[i]);
- hi = mpn_add_n (rp, rp, rp + m->size, m->size);
+ xp[i] = mpn_addmul_1 (xp + i + k,
+ m->redc_mpm1, m->size - k, xp[i]);
+ hi = mpn_add_n (rp, xp, xp + m->size, m->size);
if (shift > 0)
{
hi = (hi << shift) | (rp[m->size - 1] >> (GMP_NUMB_BITS - shift));
#define ecc_secp192r1_modp _nettle_ecc_secp192r1_modp
void
-ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
/* Use that p = 2^{192} - 2^64 - 1, to eliminate 128 bits at a time. */
#elif GMP_NUMB_BITS == 32
/* p is 6 limbs, p = B^6 - B^2 - 1 */
static void
-ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t cy;
/* Reduce from 12 to 9 limbs (top limb small)*/
- cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4);
- cy = sec_add_1 (rp + 6, rp + 6, 2, cy);
- cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4);
+ cy = mpn_add_n (xp + 2, xp + 2, xp + 8, 4);
+ cy = sec_add_1 (xp + 6, xp + 6, 2, cy);
+ cy += mpn_add_n (xp + 4, xp + 4, xp + 8, 4);
assert (cy <= 2);
- rp[8] = cy;
+ xp[8] = cy;
/* Reduce from 9 to 6 limbs */
- cy = mpn_add_n (rp, rp, rp + 6, 3);
- cy = sec_add_1 (rp + 3, rp + 3, 2, cy);
- cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3);
- cy = sec_add_1 (rp + 5, rp + 5, 1, cy);
+ cy = mpn_add_n (xp, xp, xp + 6, 3);
+ cy = sec_add_1 (xp + 3, xp + 3, 2, cy);
+ cy += mpn_add_n (xp + 2, xp + 2, xp + 6, 3);
+ cy = sec_add_1 (xp + 5, xp + 5, 1, cy);
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, ecc_Bmodp, 6);
+ cy = mpn_cnd_add_n (cy, rp, xp, ecc_Bmodp, 6);
assert (cy == 0);
}
#elif GMP_NUMB_BITS == 64
/* p is 3 limbs, p = B^3 - B - 1 */
static void
-ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t cy;
/* Reduce from 6 to 5 limbs (top limb small)*/
- cy = mpn_add_n (rp + 1, rp + 1, rp + 4, 2);
- cy = sec_add_1 (rp + 3, rp + 3, 1, cy);
- cy += mpn_add_n (rp + 2, rp + 2, rp + 4, 2);
+ cy = mpn_add_n (xp + 1, xp + 1, xp + 4, 2);
+ cy = sec_add_1 (xp + 3, xp + 3, 1, cy);
+ cy += mpn_add_n (xp + 2, xp + 2, xp + 4, 2);
assert (cy <= 2);
- rp[4] = cy;
+ xp[4] = cy;
/* Reduce from 5 to 4 limbs (high limb small) */
- cy = mpn_add_n (rp, rp, rp + 3, 2);
- cy = sec_add_1 (rp + 2, rp + 2, 1, cy);
- cy += mpn_add_n (rp + 1, rp + 1, rp + 3, 2);
+ cy = mpn_add_n (xp, xp, xp + 3, 2);
+ cy = sec_add_1 (xp + 2, xp + 2, 1, cy);
+ cy += mpn_add_n (xp + 1, xp + 1, xp + 3, 2);
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, ecc_Bmodp, 3);
+ cy = mpn_cnd_add_n (cy, rp, xp, ecc_Bmodp, 3);
assert (cy == 0);
}
#define USE_REDC 0
#define ecc_secp224r1_modp _nettle_ecc_secp224r1_modp
void
-ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#else
#define USE_REDC (ECC_REDC_SIZE != 0)
#if HAVE_NATIVE_ecc_secp256r1_redc
# define ecc_secp256r1_redc _nettle_ecc_secp256r1_redc
void
-ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp);
+ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp);
#else /* !HAVE_NATIVE_ecc_secp256r1_redc */
# if ECC_REDC_SIZE > 0
# define ecc_secp256r1_redc ecc_pp1_redc
#elif GMP_NUMB_BITS == 64
static void
-ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
+ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t u1, u0;
mp_size_t n;
n = 2*p->size;
- u1 = rp[--n];
- u0 = rp[n-1];
+ u1 = xp[--n];
+ u0 = xp[n-1];
/* This is not particularly fast, but should work well with assembly implementation. */
for (; n >= p->size; n--)
We multiply by two low limbs of p, 2^96 - 1, so we could use
shifts rather than mul.
*/
- t = mpn_submul_1 (rp + n - 4, p->m, 2, q1);
- t += mpn_cnd_sub_n (q2, rp + n - 3, rp + n - 3, p->m, 1);
+ t = mpn_submul_1 (xp + n - 4, p->m, 2, q1);
+ t += mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, p->m, 1);
t += (-q2) & 0xffffffff;
- u0 = rp[n-2];
+ u0 = xp[n-2];
cy = (u0 < t);
u0 -= t;
t = (u1 < cy);
u1 -= cy;
- cy = mpn_cnd_add_n (t, rp + n - 4, rp + n - 4, p->m, 2);
+ cy = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, p->m, 2);
u0 += cy;
u1 += (u0 < cy);
u1 -= (-t) & 0xffffffff;
}
+ rp[0] = xp[0];
+ rp[1] = xp[1];
rp[2] = u0;
rp[3] = u1;
}
static void
-ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp)
+ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t u2, u1, u0;
mp_size_t n;
n = 2*q->size;
- u2 = rp[--n];
- u1 = rp[n-1];
+ u2 = xp[--n];
+ u1 = xp[n-1];
/* This is not particularly fast, but should work well with assembly implementation. */
for (; n >= q->size; n--)
{
mp_limb_t q2, q1, q0, t, c1, c0;
- u0 = rp[n-2];
+ u0 = xp[n-2];
/* <q2, q1, q0> = v * u2 + <u2,u1>, same method as above.
assert (q2 < 2);
- c0 = mpn_cnd_sub_n (q2, rp + n - 3, rp + n - 3, q->m, 1);
+ c0 = mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, q->m, 1);
c0 += (-q2) & q->m[1];
- t = mpn_submul_1 (rp + n - 4, q->m, 2, q1);
+ t = mpn_submul_1 (xp + n - 4, q->m, 2, q1);
c0 += t;
c1 = c0 < t;
u1 += t;
u2 += (t<<32) + (u1 < t);
- t = mpn_cnd_add_n (t, rp + n - 4, rp + n - 4, q->m, 2);
+ t = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, q->m, 2);
u1 += t;
u2 += (u1 < t);
}
+ rp[0] = xp[0];
+ rp[1] = xp[1];
rp[2] = u1;
rp[3] = u2;
}
#if HAVE_NATIVE_ecc_secp384r1_modp
#define ecc_secp384r1_modp _nettle_ecc_secp384r1_modp
void
-ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#elif GMP_NUMB_BITS == 32
/* Use that 2^{384} = 2^{128} + 2^{96} - 2^{32} + 1, and eliminate 256
almost 8 at a time. Do only 7, to avoid additional carry
propagation, followed by 5. */
static void
-ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
+ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t cy, bw;
/* Reduce from 24 to 17 limbs. */
- cy = mpn_add_n (rp + 4, rp + 4, rp + 16, 8);
- cy = sec_add_1 (rp + 12, rp + 12, 3, cy);
+ cy = mpn_add_n (xp + 4, xp + 4, xp + 16, 8);
+ cy = sec_add_1 (xp + 12, xp + 12, 3, cy);
- bw = mpn_sub_n (rp + 5, rp + 5, rp + 16, 8);
- bw = sec_sub_1 (rp + 13, rp + 13, 3, bw);
+ bw = mpn_sub_n (xp + 5, xp + 5, xp + 16, 8);
+ bw = sec_sub_1 (xp + 13, xp + 13, 3, bw);
- cy += mpn_add_n (rp + 7, rp + 7, rp + 16, 8);
- cy = sec_add_1 (rp + 15, rp + 15, 1, cy);
+ cy += mpn_add_n (xp + 7, xp + 7, xp + 16, 8);
+ cy = sec_add_1 (xp + 15, xp + 15, 1, cy);
- cy += mpn_add_n (rp + 8, rp + 8, rp + 16, 8);
+ cy += mpn_add_n (xp + 8, xp + 8, xp + 16, 8);
assert (bw <= cy);
cy -= bw;
assert (cy <= 2);
- rp[16] = cy;
+ xp[16] = cy;
/* Reduce from 17 to 12 limbs */
- cy = mpn_add_n (rp, rp, rp + 12, 5);
- cy = sec_add_1 (rp + 5, rp + 5, 3, cy);
+ cy = mpn_add_n (xp, xp, xp + 12, 5);
+ cy = sec_add_1 (xp + 5, xp + 5, 3, cy);
- bw = mpn_sub_n (rp + 1, rp + 1, rp + 12, 5);
- bw = sec_sub_1 (rp + 6, rp + 6, 6, bw);
+ bw = mpn_sub_n (xp + 1, xp + 1, xp + 12, 5);
+ bw = sec_sub_1 (xp + 6, xp + 6, 6, bw);
- cy += mpn_add_n (rp + 3, rp + 3, rp + 12, 5);
- cy = sec_add_1 (rp + 8, rp + 8, 1, cy);
+ cy += mpn_add_n (xp + 3, xp + 3, xp + 12, 5);
+ cy = sec_add_1 (xp + 8, xp + 8, 1, cy);
- cy += mpn_add_n (rp + 4, rp + 4, rp + 12, 5);
- cy = sec_add_1 (rp + 9, rp + 9, 3, cy);
+ cy += mpn_add_n (xp + 4, xp + 4, xp + 12, 5);
+ cy = sec_add_1 (xp + 9, xp + 9, 3, cy);
assert (cy >= bw);
cy -= bw;
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, p->B, ECC_LIMB_SIZE);
+ cy = mpn_cnd_add_n (cy, rp, xp, p->B, ECC_LIMB_SIZE);
assert (cy == 0);
}
#elif GMP_NUMB_BITS == 64
/* p is 6 limbs, and B^6 - p = B^2 + 2^32 (B - 1) + 1. Eliminate 3
(almost 4) limbs at a time. */
static void
-ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
+ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t tp[6];
mp_limb_t cy;
/* Reduce from 12 to 9 limbs */
tp[0] = 0; /* FIXME: Could use mpn_sub_nc */
- mpn_copyi (tp + 1, rp + 8, 3);
- tp[4] = rp[11] - mpn_sub_n (tp, tp, rp + 8, 4);
+ mpn_copyi (tp + 1, xp + 8, 3);
+ tp[4] = xp[11] - mpn_sub_n (tp, tp, xp + 8, 4);
tp[5] = mpn_lshift (tp, tp, 5, 32);
- cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4);
- cy = sec_add_1 (rp + 6, rp + 6, 2, cy);
+ cy = mpn_add_n (xp + 2, xp + 2, xp + 8, 4);
+ cy = sec_add_1 (xp + 6, xp + 6, 2, cy);
- cy += mpn_add_n (rp + 2, rp + 2, tp, 6);
- cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4);
+ cy += mpn_add_n (xp + 2, xp + 2, tp, 6);
+ cy += mpn_add_n (xp + 4, xp + 4, xp + 8, 4);
assert (cy <= 2);
- rp[8] = cy;
+ xp[8] = cy;
/* Reduce from 9 to 6 limbs */
tp[0] = 0;
- mpn_copyi (tp + 1, rp + 6, 2);
- tp[3] = rp[8] - mpn_sub_n (tp, tp, rp + 6, 3);
+ mpn_copyi (tp + 1, xp + 6, 2);
+ tp[3] = xp[8] - mpn_sub_n (tp, tp, xp + 6, 3);
tp[4] = mpn_lshift (tp, tp, 4, 32);
- cy = mpn_add_n (rp, rp, rp + 6, 3);
- cy = sec_add_1 (rp + 3, rp + 3, 2, cy);
- cy += mpn_add_n (rp, rp, tp, 5);
- cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3);
+ cy = mpn_add_n (xp, xp, xp + 6, 3);
+ cy = sec_add_1 (xp + 3, xp + 3, 2, cy);
+ cy += mpn_add_n (xp, xp, tp, 5);
+ cy += mpn_add_n (xp + 2, xp + 2, xp + 6, 3);
- cy = sec_add_1 (rp + 5, rp + 5, 1, cy);
+ cy = sec_add_1 (xp + 5, xp + 5, 1, cy);
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, p->B, ECC_LIMB_SIZE);
- assert (cy == 0);
+ cy = mpn_cnd_add_n (cy, xp, xp, p->B, ECC_LIMB_SIZE);
+ assert (cy == 0);
+ mpn_copyi (rp, xp, ECC_LIMB_SIZE);
}
#else
#define ecc_secp384r1_modp ecc_mod
#endif
-
+
const struct ecc_curve _nettle_secp_384r1 =
{
{
#if HAVE_NATIVE_ecc_secp521r1_modp
#define ecc_secp521r1_modp _nettle_ecc_secp521r1_modp
void
-ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#else
/* Result may be *slightly* larger than 2^521 */
static void
-ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
/* FIXME: Should use mpn_addlsh_n_ip1 */
mp_limb_t hi;
/* Reduce from 2*ECC_LIMB_SIZE to ECC_LIMB_SIZE + 1 */
- rp[ECC_LIMB_SIZE]
- = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP);
- hi = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, 1, BMODP);
- hi = sec_add_1 (rp + 1, rp + 1, ECC_LIMB_SIZE - 1, hi);
+ xp[ECC_LIMB_SIZE]
+ = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP);
+ hi = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, 1, BMODP);
+ hi = sec_add_1 (xp + 1, xp + 1, ECC_LIMB_SIZE - 1, hi);
/* Combine hi with top bits, and add in. */
- hi = (hi << BMODP_SHIFT) | (rp[ECC_LIMB_SIZE-1] >> B_SHIFT);
- rp[ECC_LIMB_SIZE-1] = (rp[ECC_LIMB_SIZE-1]
+ hi = (hi << BMODP_SHIFT) | (xp[ECC_LIMB_SIZE-1] >> B_SHIFT);
+ rp[ECC_LIMB_SIZE-1] = (xp[ECC_LIMB_SIZE-1]
& (((mp_limb_t) 1 << B_SHIFT)-1))
- + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, hi);
+ + sec_add_1 (rp, xp, ECC_LIMB_SIZE - 1, hi);
}
#endif
hi = mpn_cnd_add_n (hi, rp + m->size, rp + m->size, m->B, m->size);
assert (hi == 0);
}
- m->mod (m, rp);
+ m->mod (m, rp, rp);
}
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size);
- ctx->ecc->p.mod (&ctx->ecc->p, ctx->rp);
+ ctx->ecc->p.mod (&ctx->ecc->p, ctx->rp, ctx->rp);
}
static void
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size);
- ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp);
+ ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp, ctx->rp);
}
static void
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size);
- ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp);
+ ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp, ctx->rp);
}
static void
#define MAX_SIZE (2*MAX_ECC_SIZE)
#define COUNT 50000
+/* Destructively normalize tp, then compare */
+static int
+mod_equal(const struct ecc_modulo *m, const mp_limb_t *ref, mp_limb_t *tp)
+{
+ if (mpn_cmp (tp, m->m, m->size) >= 0)
+ mpn_sub_n (tp, tp, m->m, m->size);
+ return mpn_cmp (ref, tp, m->size) == 0;
+}
+
static void
test_one(const char *name,
const struct ecc_modulo *m,
ref_mod (ref, a, m->m, m->size);
mpn_copyi (t, a, 2*m->size);
- m->mod (m, t);
- if (mpn_cmp (t, m->m, m->size) >= 0)
- mpn_sub_n (t, t, m->m, m->size);
-
- if (mpn_cmp (t, ref, m->size))
+ m->mod (m, t, t);
+ if (!mod_equal (m, ref, t))
{
- fprintf (stderr, "m->mod %s failed: bit_size = %u\n",
+ fprintf (stderr, "m->mod %s failed: bit_size = %u, rp == xp\n",
name, m->bit_size);
fprintf (stderr, "a = ");
abort ();
}
+ mpn_copyi (t, a, 2*m->size);
+ m->mod (m, t + m->size, t);
+ if (!mod_equal (m, ref, t + m->size))
+ {
+ fprintf (stderr, "m->mod %s failed: bit_size = %u, rp == xp + size\n",
+ name, m->bit_size);
+
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, 2*m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t + m->size, m->size);
+ fprintf (stderr, " (bad)\nref = ");
+ mpn_out_str (stderr, 16, ref, m->size);
+ fprintf (stderr, "\n");
+ abort ();
+ }
+
if (m->B_size < m->size)
{
mpn_copyi (t, a, 2*m->size);
- ecc_mod (m, t);
- if (mpn_cmp (t, m->m, m->size) >= 0)
- mpn_sub_n (t, t, m->m, m->size);
-
- if (mpn_cmp (t, ref, m->size))
+ ecc_mod (m, t, t);
+ if (!mod_equal (m, ref, t))
{
- fprintf (stderr, "ecc_mod %s failed: bit_size = %u\n",
+ fprintf (stderr, "ecc_mod %s failed: bit_size = %u, rp == xp\n",
name, m->bit_size);
fprintf (stderr, "a = ");
mpn_out_str (stderr, 16, a, 2*m->size);
fprintf (stderr, "\n");
abort ();
}
+
+ mpn_copyi (t, a, 2*m->size);
+ ecc_mod (m, t + m->size, t);
+ if (!mod_equal (m, ref, t + m->size))
+ {
+ fprintf (stderr, "ecc_mod %s failed: bit_size = %u, rp == xp + size\n",
+ name, m->bit_size);
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, 2*m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t + m->size, m->size);
+ fprintf (stderr, " (bad)\nref = ");
+ mpn_out_str (stderr, 16, ref, m->size);
+ fprintf (stderr, "\n");
+ abort ();
+ }
}
}
if (ecc->p.reduce != ecc->p.mod)
{
mpn_copyi (m, a, 2*ecc->p.size);
- ecc->p.reduce (&ecc->p, m);
+ ecc->p.reduce (&ecc->p, m, m);
if (mpn_cmp (m, ecc->p.m, ecc->p.size) >= 0)
mpn_sub_n (m, m, ecc->p.m, ecc->p.size);
{
mpn_copyi (m, a, 2*ecc->p.size);
if (ecc->p.m[0] == 1)
- ecc_pm1_redc (&ecc->p, m);
+ ecc_pm1_redc (&ecc->p, m, m);
else
- ecc_pp1_redc (&ecc->p, m);
+ ecc_pp1_redc (&ecc->p, m, m);
if (mpn_cmp (m, ecc->p.m, ecc->p.size) >= 0)
mpn_sub_n (m, m, ecc->p.m, ecc->p.size);