From: Xi Ruoyao Date: Sat, 25 Nov 2023 08:14:35 +0000 (+0800) Subject: bn_nist: Fix strict-aliasing violations in little-endian optimizations X-Git-Tag: openssl-3.1.5~125 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aa78b15b71c8996189d8bc02facbbf7975b75246;p=thirdparty%2Fopenssl.git bn_nist: Fix strict-aliasing violations in little-endian optimizations The little-endian optimization is doing some type-punning in a way violating the C standard aliasing rule by loading or storing through a lvalue with type "unsigned int" but the memory location has effective type "unsigned long" or "unsigned long long" (BN_ULONG). Convert these accesses to use memcpy instead, as memcpy is defined as-is "accessing through the lvalues with type char" and char is aliasing with all types. GCC does a good job to optimize away the temporary copies introduced with the change. Ideally copying to a temporary unsigned int array, doing the calculation, and then copying back to `r_d` will make the code look better, but unfortunately GCC would fail to optimize away this temporary array then. I've not touched the LE optimization in BN_nist_mod_224 because it's guarded by BN_BITS2!=64, then BN_BITS2 must be 32 and BN_ULONG must be unsigned int, thus there is no aliasing issue in BN_nist_mod_224. Fixes #12247. Reviewed-by: Tom Cosgrove Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/22816) (cherry picked from commit 990d9ff508070757912c000f0c4132dbb5a0bb0a) --- diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index 3d4d9a2fb2d..d761e5702da 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -319,6 +319,28 @@ static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top) # endif #endif /* BN_BITS2 != 64 */ +#ifdef NIST_INT64 +/* Helpers to load/store a 32-bit word (uint32_t) from/into a memory + * location and avoid potential aliasing issue. */ +static ossl_inline uint32_t load_u32(const void *ptr) +{ + uint32_t tmp; + + memcpy(&tmp, ptr, sizeof(tmp)); + return tmp; +} + +static ossl_inline void store_lo32(void *ptr, NIST_INT64 val) +{ + /* A cast is needed for big-endian system: on a 32-bit BE system + * NIST_INT64 may be defined as well if the compiler supports 64-bit + * long long. */ + uint32_t tmp = (uint32_t)val; + + memcpy(ptr, &tmp, sizeof(tmp)); +} +#endif /* NIST_INT64 */ + #define nist_set_192(to, from, a1, a2, a3) \ { \ bn_cp_64(to, 0, from, (a3) - 3) \ @@ -374,42 +396,42 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, unsigned int *rp = (unsigned int *)r_d; const unsigned int *bp = (const unsigned int *)buf.ui; - acc = rp[0]; + acc = load_u32(&rp[0]); acc += bp[3 * 2 - 6]; acc += bp[5 * 2 - 6]; - rp[0] = (unsigned int)acc; + store_lo32(&rp[0], acc); acc >>= 32; - acc += rp[1]; + acc += load_u32(&rp[1]); acc += bp[3 * 2 - 5]; acc += bp[5 * 2 - 5]; - rp[1] = (unsigned int)acc; + store_lo32(&rp[1], acc); acc >>= 32; - acc += rp[2]; + acc += load_u32(&rp[2]); acc += bp[3 * 2 - 6]; acc += bp[4 * 2 - 6]; acc += bp[5 * 2 - 6]; - rp[2] = (unsigned int)acc; + store_lo32(&rp[2], acc); acc >>= 32; - acc += rp[3]; + acc += load_u32(&rp[3]); acc += bp[3 * 2 - 5]; acc += bp[4 * 2 - 5]; acc += bp[5 * 2 - 5]; - rp[3] = (unsigned int)acc; + store_lo32(&rp[3], acc); acc >>= 32; - acc += rp[4]; + acc += load_u32(&rp[4]); acc += bp[4 * 2 - 6]; acc += bp[5 * 2 - 6]; - rp[4] = (unsigned int)acc; + store_lo32(&rp[4], acc); acc >>= 32; - acc += rp[5]; + acc += load_u32(&rp[5]); acc += bp[4 * 2 - 5]; acc += bp[5 * 2 - 5]; - rp[5] = (unsigned int)acc; + store_lo32(&rp[5], acc); carry = (int)(acc >> 32); } @@ -683,36 +705,36 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, unsigned int *rp = (unsigned int *)r_d; const unsigned int *bp = (const unsigned int *)buf.ui; - acc = rp[0]; + acc = load_u32(&rp[0]); acc += bp[8 - 8]; acc += bp[9 - 8]; acc -= bp[11 - 8]; acc -= bp[12 - 8]; acc -= bp[13 - 8]; acc -= bp[14 - 8]; - rp[0] = (unsigned int)acc; + store_lo32(&rp[0], acc); acc >>= 32; - acc += rp[1]; + acc += load_u32(&rp[1]); acc += bp[9 - 8]; acc += bp[10 - 8]; acc -= bp[12 - 8]; acc -= bp[13 - 8]; acc -= bp[14 - 8]; acc -= bp[15 - 8]; - rp[1] = (unsigned int)acc; + store_lo32(&rp[1], acc); acc >>= 32; - acc += rp[2]; + acc += load_u32(&rp[2]); acc += bp[10 - 8]; acc += bp[11 - 8]; acc -= bp[13 - 8]; acc -= bp[14 - 8]; acc -= bp[15 - 8]; - rp[2] = (unsigned int)acc; + store_lo32(&rp[2], acc); acc >>= 32; - acc += rp[3]; + acc += load_u32(&rp[3]); acc += bp[11 - 8]; acc += bp[11 - 8]; acc += bp[12 - 8]; @@ -721,10 +743,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[15 - 8]; acc -= bp[8 - 8]; acc -= bp[9 - 8]; - rp[3] = (unsigned int)acc; + store_lo32(&rp[3], acc); acc >>= 32; - acc += rp[4]; + acc += load_u32(&rp[4]); acc += bp[12 - 8]; acc += bp[12 - 8]; acc += bp[13 - 8]; @@ -732,10 +754,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[14 - 8]; acc -= bp[9 - 8]; acc -= bp[10 - 8]; - rp[4] = (unsigned int)acc; + store_lo32(&rp[4], acc); acc >>= 32; - acc += rp[5]; + acc += load_u32(&rp[5]); acc += bp[13 - 8]; acc += bp[13 - 8]; acc += bp[14 - 8]; @@ -743,10 +765,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[15 - 8]; acc -= bp[10 - 8]; acc -= bp[11 - 8]; - rp[5] = (unsigned int)acc; + store_lo32(&rp[5], acc); acc >>= 32; - acc += rp[6]; + acc += load_u32(&rp[6]); acc += bp[14 - 8]; acc += bp[14 - 8]; acc += bp[15 - 8]; @@ -755,10 +777,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[13 - 8]; acc -= bp[8 - 8]; acc -= bp[9 - 8]; - rp[6] = (unsigned int)acc; + store_lo32(&rp[6], acc); acc >>= 32; - acc += rp[7]; + acc += load_u32(&rp[7]); acc += bp[15 - 8]; acc += bp[15 - 8]; acc += bp[15 - 8]; @@ -767,7 +789,7 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[11 - 8]; acc -= bp[12 - 8]; acc -= bp[13 - 8]; - rp[7] = (unsigned int)acc; + store_lo32(&rp[7], acc); carry = (int)(acc >> 32); } @@ -920,32 +942,32 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, unsigned int *rp = (unsigned int *)r_d; const unsigned int *bp = (const unsigned int *)buf.ui; - acc = rp[0]; + acc = load_u32(&rp[0]); acc += bp[12 - 12]; acc += bp[21 - 12]; acc += bp[20 - 12]; acc -= bp[23 - 12]; - rp[0] = (unsigned int)acc; + store_lo32(&rp[0], acc); acc >>= 32; - acc += rp[1]; + acc += load_u32(&rp[1]); acc += bp[13 - 12]; acc += bp[22 - 12]; acc += bp[23 - 12]; acc -= bp[12 - 12]; acc -= bp[20 - 12]; - rp[1] = (unsigned int)acc; + store_lo32(&rp[1], acc); acc >>= 32; - acc += rp[2]; + acc += load_u32(&rp[2]); acc += bp[14 - 12]; acc += bp[23 - 12]; acc -= bp[13 - 12]; acc -= bp[21 - 12]; - rp[2] = (unsigned int)acc; + store_lo32(&rp[2], acc); acc >>= 32; - acc += rp[3]; + acc += load_u32(&rp[3]); acc += bp[15 - 12]; acc += bp[12 - 12]; acc += bp[20 - 12]; @@ -953,10 +975,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[14 - 12]; acc -= bp[22 - 12]; acc -= bp[23 - 12]; - rp[3] = (unsigned int)acc; + store_lo32(&rp[3], acc); acc >>= 32; - acc += rp[4]; + acc += load_u32(&rp[4]); acc += bp[21 - 12]; acc += bp[21 - 12]; acc += bp[16 - 12]; @@ -967,10 +989,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[15 - 12]; acc -= bp[23 - 12]; acc -= bp[23 - 12]; - rp[4] = (unsigned int)acc; + store_lo32(&rp[4], acc); acc >>= 32; - acc += rp[5]; + acc += load_u32(&rp[5]); acc += bp[22 - 12]; acc += bp[22 - 12]; acc += bp[17 - 12]; @@ -979,10 +1001,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[21 - 12]; acc += bp[23 - 12]; acc -= bp[16 - 12]; - rp[5] = (unsigned int)acc; + store_lo32(&rp[5], acc); acc >>= 32; - acc += rp[6]; + acc += load_u32(&rp[6]); acc += bp[23 - 12]; acc += bp[23 - 12]; acc += bp[18 - 12]; @@ -990,48 +1012,48 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[14 - 12]; acc += bp[22 - 12]; acc -= bp[17 - 12]; - rp[6] = (unsigned int)acc; + store_lo32(&rp[6], acc); acc >>= 32; - acc += rp[7]; + acc += load_u32(&rp[7]); acc += bp[19 - 12]; acc += bp[16 - 12]; acc += bp[15 - 12]; acc += bp[23 - 12]; acc -= bp[18 - 12]; - rp[7] = (unsigned int)acc; + store_lo32(&rp[7], acc); acc >>= 32; - acc += rp[8]; + acc += load_u32(&rp[8]); acc += bp[20 - 12]; acc += bp[17 - 12]; acc += bp[16 - 12]; acc -= bp[19 - 12]; - rp[8] = (unsigned int)acc; + store_lo32(&rp[8], acc); acc >>= 32; - acc += rp[9]; + acc += load_u32(&rp[9]); acc += bp[21 - 12]; acc += bp[18 - 12]; acc += bp[17 - 12]; acc -= bp[20 - 12]; - rp[9] = (unsigned int)acc; + store_lo32(&rp[9], acc); acc >>= 32; - acc += rp[10]; + acc += load_u32(&rp[10]); acc += bp[22 - 12]; acc += bp[19 - 12]; acc += bp[18 - 12]; acc -= bp[21 - 12]; - rp[10] = (unsigned int)acc; + store_lo32(&rp[10], acc); acc >>= 32; - acc += rp[11]; + acc += load_u32(&rp[11]); acc += bp[23 - 12]; acc += bp[20 - 12]; acc += bp[19 - 12]; acc -= bp[22 - 12]; - rp[11] = (unsigned int)acc; + store_lo32(&rp[11], acc); carry = (int)(acc >> 32); }