From: Richard Levitte Date: Mon, 24 Nov 2025 07:52:53 +0000 (+0100) Subject: BIGNUM: separate out word-only helper functions from bn_mul.c X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4eb452f181c22033e5a797ae50e2d90f9fa793f2;p=thirdparty%2Fopenssl.git BIGNUM: separate out word-only helper functions from bn_mul.c This separation will allow us to use the word-only helper functions from OSSL_FN functions without pulling in functions that operate on BIGNUMs. This also starts the collection of source files with word-only BN functions that haven't found their way into bn_asm.c for various reasons. To recognise them, they are prefixed 'bnw_' instead of 'bn_'. While at it, consitification is done where appropriate among words helpers. Reviewed-by: Matt Caswell Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/29203) --- diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c index 7edb77806ed..c25b06e770c 100644 --- a/crypto/bn/asm/x86_64-gcc.c +++ b/crypto/bn/asm/x86_64-gcc.c @@ -261,7 +261,7 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, #else /* Simics 1.4<7 has buggy sbbq:-( */ #define BN_MASK2 0xffffffffffffffffL -BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG t1, t2; int c = 0; @@ -408,7 +408,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) #define sqr_add_c2(a, i, j, c0, c1, c2) \ mul_add_c2((a)[i], (a)[j], c0, c1, c2) -void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) { BN_ULONG c1, c2, c3; @@ -511,7 +511,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[15] = c1; } -void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) { BN_ULONG c1, c2, c3; diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 0a77f1b070e..72649d8c6f0 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -622,7 +622,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, mul_add_c2((a)[i], (a)[j], c0, c1, c2) #endif /* !BN_LLONG */ -void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) { BN_ULONG c1, c2, c3; @@ -725,7 +725,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[15] = c1; } -void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) { BN_ULONG c1, c2, c3; @@ -1006,7 +1006,7 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) bn_sqr_normal(r, a, 8, t); } -void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) { r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); @@ -1014,7 +1014,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); } -void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b) { r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h index e33badd3602..540ff447926 100644 --- a/crypto/bn/bn_local.h +++ b/crypto/bn/bn_local.h @@ -658,21 +658,21 @@ void BN_RECP_CTX_init(BN_RECP_CTX *recp); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); void bn_init(BIGNUM *a); -void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); -void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); -void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); +void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, int na, const BN_ULONG *b, int nb); +void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); +void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp); void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n); int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); -void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, +void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2, int dna, int dnb, BN_ULONG *t); -void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, +void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n, int tna, int tnb, BN_ULONG *t); void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); -void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); -void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, +void bn_mul_low_normal(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n); +void bn_mul_low_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2, BN_ULONG *t); BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c index 5b751c6fcbd..265f1106ebb 100644 --- a/crypto/bn/bn_mul.c +++ b/crypto/bn/bn_mul.c @@ -11,489 +11,6 @@ #include "internal/cryptlib.h" #include "bn_local.h" -#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) -/* - * Here follows specialised variants of bn_add_words() and bn_sub_words(). - * They have the property performing operations on arrays of different sizes. - * The sizes of those arrays is expressed through cl, which is the common - * length ( basically, min(len(a),len(b)) ), and dl, which is the delta - * between the two lengths, calculated as len(a)-len(b). All lengths are the - * number of BN_ULONGs... For the operations that require a result array as - * parameter, it must have the length cl+abs(dl). These functions should - * probably end up in bn_asm.c as soon as there are assembler counterparts - * for the systems that use assembler files. - */ - -BN_ULONG bn_sub_part_words(BN_ULONG *r, - const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl) -{ - BN_ULONG c, t; - - assert(cl >= 0); - c = bn_sub_words(r, a, b, cl); - - if (dl == 0) - return c; - - r += cl; - a += cl; - b += cl; - - if (dl < 0) { - for (;;) { - t = b[0]; - r[0] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - t = b[1]; - r[1] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - t = b[2]; - r[2] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - t = b[3]; - r[3] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - b += 4; - r += 4; - } - } else { - int save_dl = dl; - while (c) { - t = a[0]; - r[0] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - t = a[1]; - r[1] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - t = a[2]; - r[2] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - t = a[3]; - r[3] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - save_dl = dl; - a += 4; - r += 4; - } - if (dl > 0) { - if (save_dl > dl) { - switch (save_dl - dl) { - case 1: - r[1] = a[1]; - if (--dl <= 0) - break; - /* fall through */ - case 2: - r[2] = a[2]; - if (--dl <= 0) - break; - /* fall through */ - case 3: - r[3] = a[3]; - if (--dl <= 0) - break; - } - a += 4; - r += 4; - } - } - if (dl > 0) { - for (;;) { - r[0] = a[0]; - if (--dl <= 0) - break; - r[1] = a[1]; - if (--dl <= 0) - break; - r[2] = a[2]; - if (--dl <= 0) - break; - r[3] = a[3]; - if (--dl <= 0) - break; - - a += 4; - r += 4; - } - } - } - return c; -} -#endif - -#ifdef BN_RECURSION -/* - * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of - * Computer Programming, Vol. 2) - */ - -/*- - * r is 2*n2 words in size, - * a and b are both n2 words in size. - * n2 must be a power of 2. - * We multiply and return the result. - * t must be 2*n2 words in size - * We calculate - * a[0]*b[0] - * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) - * a[1]*b[1] - */ -/* dnX may not be positive, but n2/2+dnX has to be */ -void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, - int dna, int dnb, BN_ULONG *t) -{ - int n = n2 / 2, c1, c2; - int tna = n + dna, tnb = n + dnb; - unsigned int neg, zero; - BN_ULONG ln, lo, *p; - -#ifdef BN_MUL_COMBA -#if 0 - if (n2 == 4) { - bn_mul_comba4(r, a, b); - return; - } -#endif - /* - * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete - * [steve] - */ - if (n2 == 8 && dna == 0 && dnb == 0) { - bn_mul_comba8(r, a, b); - return; - } -#endif /* BN_MUL_COMBA */ - /* Else do normal multiply */ - if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { - bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); - if ((dna + dnb) < 0) - memset(&r[2 * n2 + dna + dnb], 0, - sizeof(BN_ULONG) * -(dna + dnb)); - return; - } - /* r=(a[0]-a[1])*(b[1]-b[0]) */ - c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); - c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); - zero = neg = 0; - switch (c1 * 3 + c2) { - case -4: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ - break; - case -3: - zero = 1; - break; - case -2: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ - neg = 1; - break; - case -1: - case 0: - case 1: - zero = 1; - break; - case 2: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ - neg = 1; - break; - case 3: - zero = 1; - break; - case 4: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); - break; - } - -#ifdef BN_MUL_COMBA - if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take - * extra args to do this well */ - if (!zero) - bn_mul_comba4(&(t[n2]), t, &(t[n])); - else - memset(&t[n2], 0, sizeof(*t) * 8); - - bn_mul_comba4(r, a, b); - bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); - } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could - * take extra args to do - * this well */ - if (!zero) - bn_mul_comba8(&(t[n2]), t, &(t[n])); - else - memset(&t[n2], 0, sizeof(*t) * 16); - - bn_mul_comba8(r, a, b); - bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); - } else -#endif /* BN_MUL_COMBA */ - { - p = &(t[n2 * 2]); - if (!zero) - bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); - else - memset(&t[n2], 0, sizeof(*t) * n2); - bn_mul_recursive(r, a, b, n, 0, 0, p); - bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); - } - - /*- - * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign - * r[10] holds (a[0]*b[0]) - * r[32] holds (b[1]*b[1]) - */ - - c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); - - if (neg) { /* if t[32] is negative */ - c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); - } else { - /* Might have a carry */ - c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); - } - - /*- - * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) - * r[10] holds (a[0]*b[0]) - * r[32] holds (b[1]*b[1]) - * c1 holds the carry bits - */ - c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); - if (c1) { - p = &(r[n + n2]); - lo = *p; - ln = (lo + c1) & BN_MASK2; - *p = ln; - - /* - * The overflow will stop before we over write words we should not - * overwrite - */ - if (ln < (BN_ULONG)c1) { - do { - p++; - lo = *p; - ln = (lo + 1) & BN_MASK2; - *p = ln; - } while (ln == 0); - } - } -} - -/* - * n+tn is the word length t needs to be n*4 is size, as does r - */ -/* tnX may not be negative but less than n */ -void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, - int tna, int tnb, BN_ULONG *t) -{ - int i, j, n2 = n * 2; - int c1, c2, neg; - BN_ULONG ln, lo, *p; - - if (n < 8) { - bn_mul_normal(r, a, n + tna, b, n + tnb); - return; - } - - /* r=(a[0]-a[1])*(b[1]-b[0]) */ - c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); - c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); - neg = 0; - switch (c1 * 3 + c2) { - case -4: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ - break; - case -3: - case -2: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ - neg = 1; - break; - case -1: - case 0: - case 1: - case 2: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ - neg = 1; - break; - case 3: - case 4: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); - break; - } - /* - * The zero case isn't yet implemented here. The speedup would probably - * be negligible. - */ -#if 0 - if (n == 4) { - bn_mul_comba4(&(t[n2]), t, &(t[n])); - bn_mul_comba4(r, a, b); - bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); - memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2)); - } else -#endif - if (n == 8) { - bn_mul_comba8(&(t[n2]), t, &(t[n])); - bn_mul_comba8(r, a, b); - bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); - memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb)); - } else { - p = &(t[n2 * 2]); - bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); - bn_mul_recursive(r, a, b, n, 0, 0, p); - i = n / 2; - /* - * If there is only a bottom half to the number, just do it - */ - if (tna > tnb) - j = tna - i; - else - j = tnb - i; - if (j == 0) { - bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), - i, tna - i, tnb - i, p); - memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2)); - } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */ - bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), - i, tna - i, tnb - i, p); - memset(&(r[n2 + tna + tnb]), 0, - sizeof(BN_ULONG) * (n2 - tna - tnb)); - } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ - - memset(&r[n2], 0, sizeof(*r) * n2); - if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL - && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { - bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); - } else { - for (;;) { - i /= 2; - /* - * these simplified conditions work exclusively because - * difference between tna and tnb is 1 or 0 - */ - if (i < tna || i < tnb) { - bn_mul_part_recursive(&(r[n2]), - &(a[n]), &(b[n]), - i, tna - i, tnb - i, p); - break; - } else if (i == tna || i == tnb) { - bn_mul_recursive(&(r[n2]), - &(a[n]), &(b[n]), - i, tna - i, tnb - i, p); - break; - } - } - } - } - } - - /*- - * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign - * r[10] holds (a[0]*b[0]) - * r[32] holds (b[1]*b[1]) - */ - - c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); - - if (neg) { /* if t[32] is negative */ - c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); - } else { - /* Might have a carry */ - c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); - } - - /*- - * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) - * r[10] holds (a[0]*b[0]) - * r[32] holds (b[1]*b[1]) - * c1 holds the carry bits - */ - c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); - if (c1) { - p = &(r[n + n2]); - lo = *p; - ln = (lo + c1) & BN_MASK2; - *p = ln; - - /* - * The overflow will stop before we over write words we should not - * overwrite - */ - if (ln < (BN_ULONG)c1) { - do { - p++; - lo = *p; - ln = (lo + 1) & BN_MASK2; - *p = ln; - } while (ln == 0); - } - } -} - -/*- - * a and b must be the same size, which is n2. - * r needs to be n2 words and t needs to be n2*2 - */ -void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, - BN_ULONG *t) -{ - int n = n2 / 2; - - bn_mul_recursive(r, a, b, n, 0, 0, &(t[0])); - if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { - bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2])); - bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); - bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2])); - bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); - } else { - bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n); - bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n); - bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); - bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); - } -} -#endif /* BN_RECURSION */ - int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { int ret = bn_mul_fixed_top(r, a, b, ctx); @@ -626,66 +143,3 @@ err: BN_CTX_end(ctx); return ret; } - -void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) -{ - BN_ULONG *rr; - - if (na < nb) { - int itmp; - BN_ULONG *ltmp; - - itmp = na; - na = nb; - nb = itmp; - ltmp = a; - a = b; - b = ltmp; - } - rr = &(r[na]); - if (nb <= 0) { - (void)bn_mul_words(r, a, na, 0); - return; - } else - rr[0] = bn_mul_words(r, a, na, b[0]); - - for (;;) { - if (--nb <= 0) - return; - rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); - if (--nb <= 0) - return; - rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); - if (--nb <= 0) - return; - rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); - if (--nb <= 0) - return; - rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); - rr += 4; - r += 4; - b += 4; - } -} - -void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -{ - bn_mul_words(r, a, n, b[0]); - - for (;;) { - if (--n <= 0) - return; - bn_mul_add_words(&(r[1]), a, n, b[1]); - if (--n <= 0) - return; - bn_mul_add_words(&(r[2]), a, n, b[2]); - if (--n <= 0) - return; - bn_mul_add_words(&(r[3]), a, n, b[3]); - if (--n <= 0) - return; - bn_mul_add_words(&(r[4]), a, n, b[4]); - r += 4; - b += 4; - } -} diff --git a/crypto/bn/bnw_mul.c b/crypto/bn/bnw_mul.c new file mode 100644 index 00000000000..616bea535f9 --- /dev/null +++ b/crypto/bn/bnw_mul.c @@ -0,0 +1,415 @@ +/* + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include +#include "bn_local.h" + +#ifdef BN_RECURSION +/* + * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of + * Computer Programming, Vol. 2) + */ + +/*- + * r is 2*n2 words in size, + * a and b are both n2 words in size. + * n2 must be a power of 2. + * We multiply and return the result. + * t must be 2*n2 words in size + * We calculate + * a[0]*b[0] + * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) + * a[1]*b[1] + */ +/* dnX may not be positive, but n2/2+dnX has to be */ +void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2, + int dna, int dnb, BN_ULONG *t) +{ + int n = n2 / 2, c1, c2; + int tna = n + dna, tnb = n + dnb; + unsigned int neg, zero; + BN_ULONG ln, lo, *p; + +#ifdef BN_MUL_COMBA +#if 0 + if (n2 == 4) { + bn_mul_comba4(r, a, b); + return; + } +#endif + /* + * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete + * [steve] + */ + if (n2 == 8 && dna == 0 && dnb == 0) { + bn_mul_comba8(r, a, b); + return; + } +#endif /* BN_MUL_COMBA */ + /* Else do normal multiply */ + if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { + bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); + if ((dna + dnb) < 0) + memset(&r[2 * n2 + dna + dnb], 0, + sizeof(BN_ULONG) * -(dna + dnb)); + return; + } + /* r=(a[0]-a[1])*(b[1]-b[0]) */ + c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); + c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); + zero = neg = 0; + switch (c1 * 3 + c2) { + case -4: + bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ + bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + break; + case -3: + zero = 1; + break; + case -2: + bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ + bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ + neg = 1; + break; + case -1: + case 0: + case 1: + zero = 1; + break; + case 2: + bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ + bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + neg = 1; + break; + case 3: + zero = 1; + break; + case 4: + bn_sub_part_words(t, a, &(a[n]), tna, n - tna); + bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); + break; + } + +#ifdef BN_MUL_COMBA + if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take + * extra args to do this well */ + if (!zero) + bn_mul_comba4(&(t[n2]), t, &(t[n])); + else + memset(&t[n2], 0, sizeof(*t) * 8); + + bn_mul_comba4(r, a, b); + bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); + } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could + * take extra args to do + * this well */ + if (!zero) + bn_mul_comba8(&(t[n2]), t, &(t[n])); + else + memset(&t[n2], 0, sizeof(*t) * 16); + + bn_mul_comba8(r, a, b); + bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); + } else +#endif /* BN_MUL_COMBA */ + { + p = &(t[n2 * 2]); + if (!zero) + bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); + else + memset(&t[n2], 0, sizeof(*t) * n2); + bn_mul_recursive(r, a, b, n, 0, 0, p); + bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); + } + + /*- + * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + */ + + c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); + + if (neg) { /* if t[32] is negative */ + c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); + } else { + /* Might have a carry */ + c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); + } + + /*- + * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + * c1 holds the carry bits + */ + c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); + if (c1) { + p = &(r[n + n2]); + lo = *p; + ln = (lo + c1) & BN_MASK2; + *p = ln; + + /* + * The overflow will stop before we over write words we should not + * overwrite + */ + if (ln < (BN_ULONG)c1) { + do { + p++; + lo = *p; + ln = (lo + 1) & BN_MASK2; + *p = ln; + } while (ln == 0); + } + } +} + +/* + * n+tn is the word length t needs to be n*4 is size, as does r + */ +/* tnX may not be negative but less than n */ +void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int n, int tna, int tnb, BN_ULONG *t) +{ + int i, j, n2 = n * 2; + int c1, c2, neg; + BN_ULONG ln, lo, *p; + + if (n < 8) { + bn_mul_normal(r, a, n + tna, b, n + tnb); + return; + } + + /* r=(a[0]-a[1])*(b[1]-b[0]) */ + c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); + c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); + neg = 0; + switch (c1 * 3 + c2) { + case -4: + bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ + bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + break; + case -3: + case -2: + bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ + bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ + neg = 1; + break; + case -1: + case 0: + case 1: + case 2: + bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ + bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + neg = 1; + break; + case 3: + case 4: + bn_sub_part_words(t, a, &(a[n]), tna, n - tna); + bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); + break; + } + /* + * The zero case isn't yet implemented here. The speedup would probably + * be negligible. + */ +#if 0 + if (n == 4) { + bn_mul_comba4(&(t[n2]), t, &(t[n])); + bn_mul_comba4(r, a, b); + bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); + memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2)); + } else +#endif + if (n == 8) { + bn_mul_comba8(&(t[n2]), t, &(t[n])); + bn_mul_comba8(r, a, b); + bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); + memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb)); + } else { + p = &(t[n2 * 2]); + bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); + bn_mul_recursive(r, a, b, n, 0, 0, p); + i = n / 2; + /* + * If there is only a bottom half to the number, just do it + */ + if (tna > tnb) + j = tna - i; + else + j = tnb - i; + if (j == 0) { + bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), + i, tna - i, tnb - i, p); + memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2)); + } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */ + bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), + i, tna - i, tnb - i, p); + memset(&(r[n2 + tna + tnb]), 0, + sizeof(BN_ULONG) * (n2 - tna - tnb)); + } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ + + memset(&r[n2], 0, sizeof(*r) * n2); + if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL + && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { + bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); + } else { + for (;;) { + i /= 2; + /* + * these simplified conditions work exclusively because + * difference between tna and tnb is 1 or 0 + */ + if (i < tna || i < tnb) { + bn_mul_part_recursive(&(r[n2]), + &(a[n]), &(b[n]), + i, tna - i, tnb - i, p); + break; + } else if (i == tna || i == tnb) { + bn_mul_recursive(&(r[n2]), + &(a[n]), &(b[n]), + i, tna - i, tnb - i, p); + break; + } + } + } + } + } + + /*- + * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + */ + + c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); + + if (neg) { /* if t[32] is negative */ + c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); + } else { + /* Might have a carry */ + c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); + } + + /*- + * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + * c1 holds the carry bits + */ + c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); + if (c1) { + p = &(r[n + n2]); + lo = *p; + ln = (lo + c1) & BN_MASK2; + *p = ln; + + /* + * The overflow will stop before we over write words we should not + * overwrite + */ + if (ln < (BN_ULONG)c1) { + do { + p++; + lo = *p; + ln = (lo + 1) & BN_MASK2; + *p = ln; + } while (ln == 0); + } + } +} + +/*- + * a and b must be the same size, which is n2. + * r needs to be n2 words and t needs to be n2*2 + */ +void bn_mul_low_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int n2, BN_ULONG *t) +{ + int n = n2 / 2; + + bn_mul_recursive(r, a, b, n, 0, 0, &(t[0])); + if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { + bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2])); + bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); + bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2])); + bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); + } else { + bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n); + bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n); + bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); + bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); + } +} +#endif /* BN_RECURSION */ + +void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, int na, const BN_ULONG *b, + int nb) +{ + BN_ULONG *rr; + + if (na < nb) { + int itmp; + const BN_ULONG *ltmp; + + itmp = na; + na = nb; + nb = itmp; + ltmp = a; + a = b; + b = ltmp; + } + rr = &(r[na]); + if (nb <= 0) { + (void)bn_mul_words(r, a, na, 0); + return; + } else + rr[0] = bn_mul_words(r, a, na, b[0]); + + for (;;) { + if (--nb <= 0) + return; + rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); + if (--nb <= 0) + return; + rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); + if (--nb <= 0) + return; + rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); + if (--nb <= 0) + return; + rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); + rr += 4; + r += 4; + b += 4; + } +} + +void bn_mul_low_normal(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) +{ + bn_mul_words(r, a, n, b[0]); + + for (;;) { + if (--n <= 0) + return; + bn_mul_add_words(&(r[1]), a, n, b[1]); + if (--n <= 0) + return; + bn_mul_add_words(&(r[2]), a, n, b[2]); + if (--n <= 0) + return; + bn_mul_add_words(&(r[3]), a, n, b[3]); + if (--n <= 0) + return; + bn_mul_add_words(&(r[4]), a, n, b[4]); + r += 4; + b += 4; + } +} diff --git a/crypto/bn/bnw_sub.c b/crypto/bn/bnw_sub.c new file mode 100644 index 00000000000..f989878ce01 --- /dev/null +++ b/crypto/bn/bnw_sub.c @@ -0,0 +1,155 @@ +/* + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include +#include "bn_local.h" + +#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) +/* + * Here follows specialised variants of bn_add_words() and bn_sub_words(). + * They have the property performing operations on arrays of different sizes. + * The sizes of those arrays is expressed through cl, which is the common + * length ( basically, min(len(a),len(b)) ), and dl, which is the delta + * between the two lengths, calculated as len(a)-len(b). All lengths are the + * number of BN_ULONGs... For the operations that require a result array as + * parameter, it must have the length cl+abs(dl). + * + * These functions should probably end up in bn_asm.c as soon as there are + * assembler counterparts for the systems that use assembler files. + */ + +BN_ULONG bn_sub_part_words(BN_ULONG *r, + const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl) +{ + BN_ULONG c, t; + + assert(cl >= 0); + c = bn_sub_words(r, a, b, cl); + + if (dl == 0) + return c; + + r += cl; + a += cl; + b += cl; + + if (dl < 0) { + for (;;) { + t = b[0]; + r[0] = (0 - t - c) & BN_MASK2; + if (t != 0) + c = 1; + if (++dl >= 0) + break; + + t = b[1]; + r[1] = (0 - t - c) & BN_MASK2; + if (t != 0) + c = 1; + if (++dl >= 0) + break; + + t = b[2]; + r[2] = (0 - t - c) & BN_MASK2; + if (t != 0) + c = 1; + if (++dl >= 0) + break; + + t = b[3]; + r[3] = (0 - t - c) & BN_MASK2; + if (t != 0) + c = 1; + if (++dl >= 0) + break; + + b += 4; + r += 4; + } + } else { + int save_dl = dl; + while (c) { + t = a[0]; + r[0] = (t - c) & BN_MASK2; + if (t != 0) + c = 0; + if (--dl <= 0) + break; + + t = a[1]; + r[1] = (t - c) & BN_MASK2; + if (t != 0) + c = 0; + if (--dl <= 0) + break; + + t = a[2]; + r[2] = (t - c) & BN_MASK2; + if (t != 0) + c = 0; + if (--dl <= 0) + break; + + t = a[3]; + r[3] = (t - c) & BN_MASK2; + if (t != 0) + c = 0; + if (--dl <= 0) + break; + + save_dl = dl; + a += 4; + r += 4; + } + if (dl > 0) { + if (save_dl > dl) { + switch (save_dl - dl) { + case 1: + r[1] = a[1]; + if (--dl <= 0) + break; + /* fall through */ + case 2: + r[2] = a[2]; + if (--dl <= 0) + break; + /* fall through */ + case 3: + r[3] = a[3]; + if (--dl <= 0) + break; + } + a += 4; + r += 4; + } + } + if (dl > 0) { + for (;;) { + r[0] = a[0]; + if (--dl <= 0) + break; + r[1] = a[1]; + if (--dl <= 0) + break; + r[2] = a[2]; + if (--dl <= 0) + break; + r[3] = a[3]; + if (--dl <= 0) + break; + + a += 4; + r += 4; + } + } + } + return c; +} +#endif diff --git a/crypto/bn/build.info b/crypto/bn/build.info index 01e98e45443..10680e11004 100644 --- a/crypto/bn/build.info +++ b/crypto/bn/build.info @@ -105,11 +105,18 @@ IF[{- !$disabled{asm} -}] ENDIF ENDIF -$COMMON=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \ +$COMMON_BN=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \ bn_mod.c bn_conv.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_sqr.c \ bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ bn_intern.c bn_dh.c bn_rsa_fips186_5.c bn_const.c +# bnw_*.c is a growing collection of files with routines that operate on +# BN_ULONG only. They were helper routines dispersed in bn_*.c, and are +# often routines that don't have an assembler implementation, and therefore +# didn't fit into bn_asm.c. +$COMMON_BNW=bnw_sub.c bnw_mul.c +$COMMON=$COMMON_BN $COMMON_BNW + SOURCE[../../libcrypto]=$COMMON $BNASM bn_print.c bn_err.c bn_srp.c DEFINE[../../libcrypto]=$BNDEF IF[{- !$disabled{'deprecated-0.9.8'} -}]