From: Richard Levitte <levitte@openssl.org>
Date: Mon, 24 Nov 2025 07:52:53 +0000 (+0100)
Subject: BIGNUM: separate out word-only helper functions from bn_mul.c
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4eb452f181c22033e5a797ae50e2d90f9fa793f2;p=thirdparty%2Fopenssl.git

BIGNUM: separate out word-only helper functions from bn_mul.c

This separation will allow us to use the word-only helper functions
from OSSL_FN functions without pulling in functions that operate on
BIGNUMs.

This also starts the collection of source files with word-only BN
functions that haven't found their way into bn_asm.c for various
reasons.
To recognise them, they are prefixed 'bnw_' instead of 'bn_'.

While at it, consitification is done where appropriate among words
helpers.

Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/29203)
---

diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
index 7edb77806ed..c25b06e770c 100644
--- a/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/bn/asm/x86_64-gcc.c
@@ -261,7 +261,7 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
 #else
 /* Simics 1.4<7 has buggy sbbq:-( */
 #define BN_MASK2 0xffffffffffffffffL
-BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
 {
     BN_ULONG t1, t2;
     int c = 0;
@@ -408,7 +408,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 #define sqr_add_c2(a, i, j, c0, c1, c2) \
     mul_add_c2((a)[i], (a)[j], c0, c1, c2)
 
-void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
 {
     BN_ULONG c1, c2, c3;
 
@@ -511,7 +511,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
     r[15] = c1;
 }
 
-void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
 {
     BN_ULONG c1, c2, c3;
 
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
index 0a77f1b070e..72649d8c6f0 100644
--- a/crypto/bn/bn_asm.c
+++ b/crypto/bn/bn_asm.c
@@ -622,7 +622,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
     mul_add_c2((a)[i], (a)[j], c0, c1, c2)
 #endif /* !BN_LLONG */
 
-void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
 {
     BN_ULONG c1, c2, c3;
 
@@ -725,7 +725,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
     r[15] = c1;
 }
 
-void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
 {
     BN_ULONG c1, c2, c3;
 
@@ -1006,7 +1006,7 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
     bn_sqr_normal(r, a, 8, t);
 }
 
-void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
 {
     r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
     r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
@@ -1014,7 +1014,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
     r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
 }
 
-void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b)
 {
     r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
     r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h
index e33badd3602..540ff447926 100644
--- a/crypto/bn/bn_local.h
+++ b/crypto/bn/bn_local.h
@@ -658,21 +658,21 @@ void BN_RECP_CTX_init(BN_RECP_CTX *recp);
 void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
 
 void bn_init(BIGNUM *a);
-void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
-void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
-void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, int na, const BN_ULONG *b, int nb);
+void bn_mul_comba8(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);
+void bn_mul_comba4(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);
 void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
 int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
 int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl);
-void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2,
     int dna, int dnb, BN_ULONG *t);
-void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
+void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
     int n, int tna, int tnb, BN_ULONG *t);
 void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
-void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
-void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+void bn_mul_low_normal(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n);
+void bn_mul_low_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2,
     BN_ULONG *t);
 BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
     int cl, int dl);
diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c
index 5b751c6fcbd..265f1106ebb 100644
--- a/crypto/bn/bn_mul.c
+++ b/crypto/bn/bn_mul.c
@@ -11,489 +11,6 @@
 #include "internal/cryptlib.h"
 #include "bn_local.h"
 
-#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
-/*
- * Here follows specialised variants of bn_add_words() and bn_sub_words().
- * They have the property performing operations on arrays of different sizes.
- * The sizes of those arrays is expressed through cl, which is the common
- * length ( basically, min(len(a),len(b)) ), and dl, which is the delta
- * between the two lengths, calculated as len(a)-len(b). All lengths are the
- * number of BN_ULONGs...  For the operations that require a result array as
- * parameter, it must have the length cl+abs(dl). These functions should
- * probably end up in bn_asm.c as soon as there are assembler counterparts
- * for the systems that use assembler files.
- */
-
-BN_ULONG bn_sub_part_words(BN_ULONG *r,
-    const BN_ULONG *a, const BN_ULONG *b,
-    int cl, int dl)
-{
-    BN_ULONG c, t;
-
-    assert(cl >= 0);
-    c = bn_sub_words(r, a, b, cl);
-
-    if (dl == 0)
-        return c;
-
-    r += cl;
-    a += cl;
-    b += cl;
-
-    if (dl < 0) {
-        for (;;) {
-            t = b[0];
-            r[0] = (0 - t - c) & BN_MASK2;
-            if (t != 0)
-                c = 1;
-            if (++dl >= 0)
-                break;
-
-            t = b[1];
-            r[1] = (0 - t - c) & BN_MASK2;
-            if (t != 0)
-                c = 1;
-            if (++dl >= 0)
-                break;
-
-            t = b[2];
-            r[2] = (0 - t - c) & BN_MASK2;
-            if (t != 0)
-                c = 1;
-            if (++dl >= 0)
-                break;
-
-            t = b[3];
-            r[3] = (0 - t - c) & BN_MASK2;
-            if (t != 0)
-                c = 1;
-            if (++dl >= 0)
-                break;
-
-            b += 4;
-            r += 4;
-        }
-    } else {
-        int save_dl = dl;
-        while (c) {
-            t = a[0];
-            r[0] = (t - c) & BN_MASK2;
-            if (t != 0)
-                c = 0;
-            if (--dl <= 0)
-                break;
-
-            t = a[1];
-            r[1] = (t - c) & BN_MASK2;
-            if (t != 0)
-                c = 0;
-            if (--dl <= 0)
-                break;
-
-            t = a[2];
-            r[2] = (t - c) & BN_MASK2;
-            if (t != 0)
-                c = 0;
-            if (--dl <= 0)
-                break;
-
-            t = a[3];
-            r[3] = (t - c) & BN_MASK2;
-            if (t != 0)
-                c = 0;
-            if (--dl <= 0)
-                break;
-
-            save_dl = dl;
-            a += 4;
-            r += 4;
-        }
-        if (dl > 0) {
-            if (save_dl > dl) {
-                switch (save_dl - dl) {
-                case 1:
-                    r[1] = a[1];
-                    if (--dl <= 0)
-                        break;
-                    /* fall through */
-                case 2:
-                    r[2] = a[2];
-                    if (--dl <= 0)
-                        break;
-                    /* fall through */
-                case 3:
-                    r[3] = a[3];
-                    if (--dl <= 0)
-                        break;
-                }
-                a += 4;
-                r += 4;
-            }
-        }
-        if (dl > 0) {
-            for (;;) {
-                r[0] = a[0];
-                if (--dl <= 0)
-                    break;
-                r[1] = a[1];
-                if (--dl <= 0)
-                    break;
-                r[2] = a[2];
-                if (--dl <= 0)
-                    break;
-                r[3] = a[3];
-                if (--dl <= 0)
-                    break;
-
-                a += 4;
-                r += 4;
-            }
-        }
-    }
-    return c;
-}
-#endif
-
-#ifdef BN_RECURSION
-/*
- * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
- * Computer Programming, Vol. 2)
- */
-
-/*-
- * r is 2*n2 words in size,
- * a and b are both n2 words in size.
- * n2 must be a power of 2.
- * We multiply and return the result.
- * t must be 2*n2 words in size
- * We calculate
- * a[0]*b[0]
- * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
- * a[1]*b[1]
- */
-/* dnX may not be positive, but n2/2+dnX has to be */
-void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
-    int dna, int dnb, BN_ULONG *t)
-{
-    int n = n2 / 2, c1, c2;
-    int tna = n + dna, tnb = n + dnb;
-    unsigned int neg, zero;
-    BN_ULONG ln, lo, *p;
-
-#ifdef BN_MUL_COMBA
-#if 0
-    if (n2 == 4) {
-        bn_mul_comba4(r, a, b);
-        return;
-    }
-#endif
-    /*
-     * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete
-     * [steve]
-     */
-    if (n2 == 8 && dna == 0 && dnb == 0) {
-        bn_mul_comba8(r, a, b);
-        return;
-    }
-#endif /* BN_MUL_COMBA */
-    /* Else do normal multiply */
-    if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
-        bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
-        if ((dna + dnb) < 0)
-            memset(&r[2 * n2 + dna + dnb], 0,
-                sizeof(BN_ULONG) * -(dna + dnb));
-        return;
-    }
-    /* r=(a[0]-a[1])*(b[1]-b[0]) */
-    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
-    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
-    zero = neg = 0;
-    switch (c1 * 3 + c2) {
-    case -4:
-        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
-        break;
-    case -3:
-        zero = 1;
-        break;
-    case -2:
-        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
-        neg = 1;
-        break;
-    case -1:
-    case 0:
-    case 1:
-        zero = 1;
-        break;
-    case 2:
-        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
-        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
-        neg = 1;
-        break;
-    case 3:
-        zero = 1;
-        break;
-    case 4:
-        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
-        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
-        break;
-    }
-
-#ifdef BN_MUL_COMBA
-    if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take
-                                           * extra args to do this well */
-        if (!zero)
-            bn_mul_comba4(&(t[n2]), t, &(t[n]));
-        else
-            memset(&t[n2], 0, sizeof(*t) * 8);
-
-        bn_mul_comba4(r, a, b);
-        bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
-    } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could
-                                                  * take extra args to do
-                                                  * this well */
-        if (!zero)
-            bn_mul_comba8(&(t[n2]), t, &(t[n]));
-        else
-            memset(&t[n2], 0, sizeof(*t) * 16);
-
-        bn_mul_comba8(r, a, b);
-        bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
-    } else
-#endif /* BN_MUL_COMBA */
-    {
-        p = &(t[n2 * 2]);
-        if (!zero)
-            bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
-        else
-            memset(&t[n2], 0, sizeof(*t) * n2);
-        bn_mul_recursive(r, a, b, n, 0, 0, p);
-        bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
-    }
-
-    /*-
-     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
-     * r[10] holds (a[0]*b[0])
-     * r[32] holds (b[1]*b[1])
-     */
-
-    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
-
-    if (neg) { /* if t[32] is negative */
-        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
-    } else {
-        /* Might have a carry */
-        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
-    }
-
-    /*-
-     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
-     * r[10] holds (a[0]*b[0])
-     * r[32] holds (b[1]*b[1])
-     * c1 holds the carry bits
-     */
-    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
-    if (c1) {
-        p = &(r[n + n2]);
-        lo = *p;
-        ln = (lo + c1) & BN_MASK2;
-        *p = ln;
-
-        /*
-         * The overflow will stop before we over write words we should not
-         * overwrite
-         */
-        if (ln < (BN_ULONG)c1) {
-            do {
-                p++;
-                lo = *p;
-                ln = (lo + 1) & BN_MASK2;
-                *p = ln;
-            } while (ln == 0);
-        }
-    }
-}
-
-/*
- * n+tn is the word length t needs to be n*4 is size, as does r
- */
-/* tnX may not be negative but less than n */
-void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
-    int tna, int tnb, BN_ULONG *t)
-{
-    int i, j, n2 = n * 2;
-    int c1, c2, neg;
-    BN_ULONG ln, lo, *p;
-
-    if (n < 8) {
-        bn_mul_normal(r, a, n + tna, b, n + tnb);
-        return;
-    }
-
-    /* r=(a[0]-a[1])*(b[1]-b[0]) */
-    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
-    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
-    neg = 0;
-    switch (c1 * 3 + c2) {
-    case -4:
-        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
-        break;
-    case -3:
-    case -2:
-        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
-        neg = 1;
-        break;
-    case -1:
-    case 0:
-    case 1:
-    case 2:
-        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
-        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
-        neg = 1;
-        break;
-    case 3:
-    case 4:
-        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
-        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
-        break;
-    }
-    /*
-     * The zero case isn't yet implemented here. The speedup would probably
-     * be negligible.
-     */
-#if 0
-    if (n == 4) {
-        bn_mul_comba4(&(t[n2]), t, &(t[n]));
-        bn_mul_comba4(r, a, b);
-        bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
-        memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2));
-    } else
-#endif
-    if (n == 8) {
-        bn_mul_comba8(&(t[n2]), t, &(t[n]));
-        bn_mul_comba8(r, a, b);
-        bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
-        memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb));
-    } else {
-        p = &(t[n2 * 2]);
-        bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
-        bn_mul_recursive(r, a, b, n, 0, 0, p);
-        i = n / 2;
-        /*
-         * If there is only a bottom half to the number, just do it
-         */
-        if (tna > tnb)
-            j = tna - i;
-        else
-            j = tnb - i;
-        if (j == 0) {
-            bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
-                i, tna - i, tnb - i, p);
-            memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2));
-        } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */
-            bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
-                i, tna - i, tnb - i, p);
-            memset(&(r[n2 + tna + tnb]), 0,
-                sizeof(BN_ULONG) * (n2 - tna - tnb));
-        } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
-
-            memset(&r[n2], 0, sizeof(*r) * n2);
-            if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
-                && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
-                bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
-            } else {
-                for (;;) {
-                    i /= 2;
-                    /*
-                     * these simplified conditions work exclusively because
-                     * difference between tna and tnb is 1 or 0
-                     */
-                    if (i < tna || i < tnb) {
-                        bn_mul_part_recursive(&(r[n2]),
-                            &(a[n]), &(b[n]),
-                            i, tna - i, tnb - i, p);
-                        break;
-                    } else if (i == tna || i == tnb) {
-                        bn_mul_recursive(&(r[n2]),
-                            &(a[n]), &(b[n]),
-                            i, tna - i, tnb - i, p);
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    /*-
-     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
-     * r[10] holds (a[0]*b[0])
-     * r[32] holds (b[1]*b[1])
-     */
-
-    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
-
-    if (neg) { /* if t[32] is negative */
-        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
-    } else {
-        /* Might have a carry */
-        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
-    }
-
-    /*-
-     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
-     * r[10] holds (a[0]*b[0])
-     * r[32] holds (b[1]*b[1])
-     * c1 holds the carry bits
-     */
-    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
-    if (c1) {
-        p = &(r[n + n2]);
-        lo = *p;
-        ln = (lo + c1) & BN_MASK2;
-        *p = ln;
-
-        /*
-         * The overflow will stop before we over write words we should not
-         * overwrite
-         */
-        if (ln < (BN_ULONG)c1) {
-            do {
-                p++;
-                lo = *p;
-                ln = (lo + 1) & BN_MASK2;
-                *p = ln;
-            } while (ln == 0);
-        }
-    }
-}
-
-/*-
- * a and b must be the same size, which is n2.
- * r needs to be n2 words and t needs to be n2*2
- */
-void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
-    BN_ULONG *t)
-{
-    int n = n2 / 2;
-
-    bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
-    if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
-        bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
-        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
-        bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
-        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
-    } else {
-        bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
-        bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
-        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
-        bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
-    }
-}
-#endif /* BN_RECURSION */
-
 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
 {
     int ret = bn_mul_fixed_top(r, a, b, ctx);
@@ -626,66 +143,3 @@ err:
     BN_CTX_end(ctx);
     return ret;
 }
-
-void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
-{
-    BN_ULONG *rr;
-
-    if (na < nb) {
-        int itmp;
-        BN_ULONG *ltmp;
-
-        itmp = na;
-        na = nb;
-        nb = itmp;
-        ltmp = a;
-        a = b;
-        b = ltmp;
-    }
-    rr = &(r[na]);
-    if (nb <= 0) {
-        (void)bn_mul_words(r, a, na, 0);
-        return;
-    } else
-        rr[0] = bn_mul_words(r, a, na, b[0]);
-
-    for (;;) {
-        if (--nb <= 0)
-            return;
-        rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
-        if (--nb <= 0)
-            return;
-        rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
-        if (--nb <= 0)
-            return;
-        rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
-        if (--nb <= 0)
-            return;
-        rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
-        rr += 4;
-        r += 4;
-        b += 4;
-    }
-}
-
-void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-{
-    bn_mul_words(r, a, n, b[0]);
-
-    for (;;) {
-        if (--n <= 0)
-            return;
-        bn_mul_add_words(&(r[1]), a, n, b[1]);
-        if (--n <= 0)
-            return;
-        bn_mul_add_words(&(r[2]), a, n, b[2]);
-        if (--n <= 0)
-            return;
-        bn_mul_add_words(&(r[3]), a, n, b[3]);
-        if (--n <= 0)
-            return;
-        bn_mul_add_words(&(r[4]), a, n, b[4]);
-        r += 4;
-        b += 4;
-    }
-}
diff --git a/crypto/bn/bnw_mul.c b/crypto/bn/bnw_mul.c
new file mode 100644
index 00000000000..616bea535f9
--- /dev/null
+++ b/crypto/bn/bnw_mul.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <assert.h>
+#include "bn_local.h"
+
+#ifdef BN_RECURSION
+/*
+ * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
+ * Computer Programming, Vol. 2)
+ */
+
+/*-
+ * r is 2*n2 words in size,
+ * a and b are both n2 words in size.
+ * n2 must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n2 words in size
+ * We calculate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+/* dnX may not be positive, but n2/2+dnX has to be */
+void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n2,
+    int dna, int dnb, BN_ULONG *t)
+{
+    int n = n2 / 2, c1, c2;
+    int tna = n + dna, tnb = n + dnb;
+    unsigned int neg, zero;
+    BN_ULONG ln, lo, *p;
+
+#ifdef BN_MUL_COMBA
+#if 0
+    if (n2 == 4) {
+        bn_mul_comba4(r, a, b);
+        return;
+    }
+#endif
+    /*
+     * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete
+     * [steve]
+     */
+    if (n2 == 8 && dna == 0 && dnb == 0) {
+        bn_mul_comba8(r, a, b);
+        return;
+    }
+#endif /* BN_MUL_COMBA */
+    /* Else do normal multiply */
+    if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
+        bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
+        if ((dna + dnb) < 0)
+            memset(&r[2 * n2 + dna + dnb], 0,
+                sizeof(BN_ULONG) * -(dna + dnb));
+        return;
+    }
+    /* r=(a[0]-a[1])*(b[1]-b[0]) */
+    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
+    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
+    zero = neg = 0;
+    switch (c1 * 3 + c2) {
+    case -4:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        break;
+    case -3:
+        zero = 1;
+        break;
+    case -2:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+        neg = 1;
+        break;
+    case -1:
+    case 0:
+    case 1:
+        zero = 1;
+        break;
+    case 2:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        neg = 1;
+        break;
+    case 3:
+        zero = 1;
+        break;
+    case 4:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+        break;
+    }
+
+#ifdef BN_MUL_COMBA
+    if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take
+                                           * extra args to do this well */
+        if (!zero)
+            bn_mul_comba4(&(t[n2]), t, &(t[n]));
+        else
+            memset(&t[n2], 0, sizeof(*t) * 8);
+
+        bn_mul_comba4(r, a, b);
+        bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
+    } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could
+                                                  * take extra args to do
+                                                  * this well */
+        if (!zero)
+            bn_mul_comba8(&(t[n2]), t, &(t[n]));
+        else
+            memset(&t[n2], 0, sizeof(*t) * 16);
+
+        bn_mul_comba8(r, a, b);
+        bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
+    } else
+#endif /* BN_MUL_COMBA */
+    {
+        p = &(t[n2 * 2]);
+        if (!zero)
+            bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
+        else
+            memset(&t[n2], 0, sizeof(*t) * n2);
+        bn_mul_recursive(r, a, b, n, 0, 0, p);
+        bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     */
+
+    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
+
+    if (neg) { /* if t[32] is negative */
+        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
+    } else {
+        /* Might have a carry */
+        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     * c1 holds the carry bits
+     */
+    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
+    if (c1) {
+        p = &(r[n + n2]);
+        lo = *p;
+        ln = (lo + c1) & BN_MASK2;
+        *p = ln;
+
+        /*
+         * The overflow will stop before we over write words we should not
+         * overwrite
+         */
+        if (ln < (BN_ULONG)c1) {
+            do {
+                p++;
+                lo = *p;
+                ln = (lo + 1) & BN_MASK2;
+                *p = ln;
+            } while (ln == 0);
+        }
+    }
+}
+
+/*
+ * n+tn is the word length t needs to be n*4 is size, as does r
+ */
+/* tnX may not be negative but less than n */
+void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+    int n, int tna, int tnb, BN_ULONG *t)
+{
+    int i, j, n2 = n * 2;
+    int c1, c2, neg;
+    BN_ULONG ln, lo, *p;
+
+    if (n < 8) {
+        bn_mul_normal(r, a, n + tna, b, n + tnb);
+        return;
+    }
+
+    /* r=(a[0]-a[1])*(b[1]-b[0]) */
+    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
+    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
+    neg = 0;
+    switch (c1 * 3 + c2) {
+    case -4:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        break;
+    case -3:
+    case -2:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+        neg = 1;
+        break;
+    case -1:
+    case 0:
+    case 1:
+    case 2:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        neg = 1;
+        break;
+    case 3:
+    case 4:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+        break;
+    }
+    /*
+     * The zero case isn't yet implemented here. The speedup would probably
+     * be negligible.
+     */
+#if 0
+    if (n == 4) {
+        bn_mul_comba4(&(t[n2]), t, &(t[n]));
+        bn_mul_comba4(r, a, b);
+        bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
+        memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2));
+    } else
+#endif
+    if (n == 8) {
+        bn_mul_comba8(&(t[n2]), t, &(t[n]));
+        bn_mul_comba8(r, a, b);
+        bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
+        memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb));
+    } else {
+        p = &(t[n2 * 2]);
+        bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
+        bn_mul_recursive(r, a, b, n, 0, 0, p);
+        i = n / 2;
+        /*
+         * If there is only a bottom half to the number, just do it
+         */
+        if (tna > tnb)
+            j = tna - i;
+        else
+            j = tnb - i;
+        if (j == 0) {
+            bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
+                i, tna - i, tnb - i, p);
+            memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2));
+        } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */
+            bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
+                i, tna - i, tnb - i, p);
+            memset(&(r[n2 + tna + tnb]), 0,
+                sizeof(BN_ULONG) * (n2 - tna - tnb));
+        } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
+
+            memset(&r[n2], 0, sizeof(*r) * n2);
+            if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
+                && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
+                bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
+            } else {
+                for (;;) {
+                    i /= 2;
+                    /*
+                     * these simplified conditions work exclusively because
+                     * difference between tna and tnb is 1 or 0
+                     */
+                    if (i < tna || i < tnb) {
+                        bn_mul_part_recursive(&(r[n2]),
+                            &(a[n]), &(b[n]),
+                            i, tna - i, tnb - i, p);
+                        break;
+                    } else if (i == tna || i == tnb) {
+                        bn_mul_recursive(&(r[n2]),
+                            &(a[n]), &(b[n]),
+                            i, tna - i, tnb - i, p);
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     */
+
+    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
+
+    if (neg) { /* if t[32] is negative */
+        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
+    } else {
+        /* Might have a carry */
+        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     * c1 holds the carry bits
+     */
+    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
+    if (c1) {
+        p = &(r[n + n2]);
+        lo = *p;
+        ln = (lo + c1) & BN_MASK2;
+        *p = ln;
+
+        /*
+         * The overflow will stop before we over write words we should not
+         * overwrite
+         */
+        if (ln < (BN_ULONG)c1) {
+            do {
+                p++;
+                lo = *p;
+                ln = (lo + 1) & BN_MASK2;
+                *p = ln;
+            } while (ln == 0);
+        }
+    }
+}
+
+/*-
+ * a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ */
+void bn_mul_low_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+    int n2, BN_ULONG *t)
+{
+    int n = n2 / 2;
+
+    bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
+    if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
+        bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
+        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
+        bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
+        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
+    } else {
+        bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
+        bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
+        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
+        bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
+    }
+}
+#endif /* BN_RECURSION */
+
+void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, int na, const BN_ULONG *b,
+    int nb)
+{
+    BN_ULONG *rr;
+
+    if (na < nb) {
+        int itmp;
+        const BN_ULONG *ltmp;
+
+        itmp = na;
+        na = nb;
+        nb = itmp;
+        ltmp = a;
+        a = b;
+        b = ltmp;
+    }
+    rr = &(r[na]);
+    if (nb <= 0) {
+        (void)bn_mul_words(r, a, na, 0);
+        return;
+    } else
+        rr[0] = bn_mul_words(r, a, na, b[0]);
+
+    for (;;) {
+        if (--nb <= 0)
+            return;
+        rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
+        if (--nb <= 0)
+            return;
+        rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
+        if (--nb <= 0)
+            return;
+        rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
+        if (--nb <= 0)
+            return;
+        rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
+        rr += 4;
+        r += 4;
+        b += 4;
+    }
+}
+
+void bn_mul_low_normal(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
+{
+    bn_mul_words(r, a, n, b[0]);
+
+    for (;;) {
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[1]), a, n, b[1]);
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[2]), a, n, b[2]);
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[3]), a, n, b[3]);
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[4]), a, n, b[4]);
+        r += 4;
+        b += 4;
+    }
+}
diff --git a/crypto/bn/bnw_sub.c b/crypto/bn/bnw_sub.c
new file mode 100644
index 00000000000..f989878ce01
--- /dev/null
+++ b/crypto/bn/bnw_sub.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <assert.h>
+#include "bn_local.h"
+
+#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
+/*
+ * Here follows specialised variants of bn_add_words() and bn_sub_words().
+ * They have the property performing operations on arrays of different sizes.
+ * The sizes of those arrays is expressed through cl, which is the common
+ * length ( basically, min(len(a),len(b)) ), and dl, which is the delta
+ * between the two lengths, calculated as len(a)-len(b). All lengths are the
+ * number of BN_ULONGs...  For the operations that require a result array as
+ * parameter, it must have the length cl+abs(dl).
+ *
+ * These functions should probably end up in bn_asm.c as soon as there are
+ * assembler counterparts for the systems that use assembler files.
+ */
+
+BN_ULONG bn_sub_part_words(BN_ULONG *r,
+    const BN_ULONG *a, const BN_ULONG *b,
+    int cl, int dl)
+{
+    BN_ULONG c, t;
+
+    assert(cl >= 0);
+    c = bn_sub_words(r, a, b, cl);
+
+    if (dl == 0)
+        return c;
+
+    r += cl;
+    a += cl;
+    b += cl;
+
+    if (dl < 0) {
+        for (;;) {
+            t = b[0];
+            r[0] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            t = b[1];
+            r[1] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            t = b[2];
+            r[2] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            t = b[3];
+            r[3] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            b += 4;
+            r += 4;
+        }
+    } else {
+        int save_dl = dl;
+        while (c) {
+            t = a[0];
+            r[0] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            t = a[1];
+            r[1] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            t = a[2];
+            r[2] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            t = a[3];
+            r[3] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            save_dl = dl;
+            a += 4;
+            r += 4;
+        }
+        if (dl > 0) {
+            if (save_dl > dl) {
+                switch (save_dl - dl) {
+                case 1:
+                    r[1] = a[1];
+                    if (--dl <= 0)
+                        break;
+                    /* fall through */
+                case 2:
+                    r[2] = a[2];
+                    if (--dl <= 0)
+                        break;
+                    /* fall through */
+                case 3:
+                    r[3] = a[3];
+                    if (--dl <= 0)
+                        break;
+                }
+                a += 4;
+                r += 4;
+            }
+        }
+        if (dl > 0) {
+            for (;;) {
+                r[0] = a[0];
+                if (--dl <= 0)
+                    break;
+                r[1] = a[1];
+                if (--dl <= 0)
+                    break;
+                r[2] = a[2];
+                if (--dl <= 0)
+                    break;
+                r[3] = a[3];
+                if (--dl <= 0)
+                    break;
+
+                a += 4;
+                r += 4;
+            }
+        }
+    }
+    return c;
+}
+#endif
diff --git a/crypto/bn/build.info b/crypto/bn/build.info
index 01e98e45443..10680e11004 100644
--- a/crypto/bn/build.info
+++ b/crypto/bn/build.info
@@ -105,11 +105,18 @@ IF[{- !$disabled{asm} -}]
   ENDIF
 ENDIF
 
-$COMMON=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \
+$COMMON_BN=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \
         bn_mod.c bn_conv.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
         bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_sqr.c \
         bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
         bn_intern.c bn_dh.c bn_rsa_fips186_5.c bn_const.c
+# bnw_*.c is a growing collection of files with routines that operate on
+# BN_ULONG only.  They were helper routines dispersed in bn_*.c, and are
+# often routines that don't have an assembler implementation, and therefore
+# didn't fit into bn_asm.c.
+$COMMON_BNW=bnw_sub.c bnw_mul.c
+$COMMON=$COMMON_BN $COMMON_BNW
+
 SOURCE[../../libcrypto]=$COMMON $BNASM bn_print.c bn_err.c bn_srp.c
 DEFINE[../../libcrypto]=$BNDEF
 IF[{- !$disabled{'deprecated-0.9.8'} -}]