From: Viktor Dukhovni Date: Thu, 16 Apr 2026 11:41:07 +0000 (+1000) Subject: Drop value barrier from ML-DSA reduce_once X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a2079b82ba9b78f8a9e76c2f3387c39c0b44d28a;p=thirdparty%2Fopenssl.git Drop value barrier from ML-DSA reduce_once This mirrors the corresponding code in ML-KEM and works under the same conditions/assumptions. Also adjusted related functions with unnecessary 2-layers of constant_time selects where one suffices (now also matching BoringSSL). Intentionally uses the constant time instrumentation PR as its merge-base, so to be merged after than has baked in for a few days and shows working CT tests in daily CI runs. Sample before/after performance pairs and percent throughput increases for one X86_64 CPU: keygens/s sign/s verify/s ML-DSA-44 18728.3 6061.2 23251.6 ML-DSA-44 21077.2 7392.4 27244.3 ML-DSA-44 12.5% 22.0% 17.2% ML-DSA-65 10084.3 3603.0 13988.6 ML-DSA-65 11197.9 4549.7 16208.4 ML-DSA-65 11.0% 26.3% 15.9% ML-DSA-87 7184.8 2917.3 8141.0 ML-DSA-87 8132.4 3693.7 9430.7 ML-DSA-87 13.2% 26.6% 15.8% and here's the same for an Apple silicon M2: keygens/s sign/s verify/s ML-DSA-44 17235.7 3099.3 15744.5 ML-DSA-44 21855.2 4907.6 22849.0 ML-DSA-44 26.8% 58.3% 45.1% ML-DSA-65 9165.8 1908.5 10058.3 ML-DSA-65 11262.7 3069.6 14348.1 ML-DSA-65 22.9% 60.8% 42.6% ML-DSA-87 6596.1 1563.6 6330.8 ML-DSA-87 8404.9 2584.6 8767.6 ML-DSA-87 27.4% 65.3% 38.5% Reviewed-by: Eugene Syromiatnikov Reviewed-by: Nikola Pajkovsky MergeDate: Thu Apr 23 13:55:05 2026 (Merged from https://github.com/openssl/openssl/pull/30864) --- diff --git a/crypto/ml_dsa/ml_dsa_local.h b/crypto/ml_dsa/ml_dsa_local.h index d4f63f7e991..bbaa6dafc75 100644 --- a/crypto/ml_dsa/ml_dsa_local.h +++ b/crypto/ml_dsa/ml_dsa_local.h @@ -101,20 +101,26 @@ int ossl_ml_dsa_poly_decode_expand_mask(POLY *out, const uint8_t *in, size_t in_len, uint32_t gamma1); -/* - * @brief Reduces x mod q in constant time +/*- + * @brief Reduces 0 <= x < 2*q, mod q. * i.e. return x < q ? x : x - q; * - * @param x Where x is assumed to be in the range 0 <= x < 2*q + * Subtract |q| if the input is larger, without exposing a side-channel, + * avoiding the "clangover" attack. See |constish_time_true| for a discussion + * on why the value barrier is by default omitted. + * * @returns the difference in the range 0..q-1 */ -static ossl_inline ossl_unused uint32_t reduce_once(uint32_t x) +static ossl_inline ossl_unused __owur uint32_t reduce_once(uint32_t x) { - return constant_time_select_32(constant_time_lt_32(x, ML_DSA_Q), x, x - ML_DSA_Q); + const uint32_t subtracted = x - ML_DSA_Q; + uint32_t mask = constish_time_true(subtracted >> 31); + + return (mask & x) | (~mask & subtracted); } /* - * @brief Calculate The positive value of (a-b) mod q in constant time. + * @brief Calculates the positive value of (a-b) mod q in constant time. * * a - b mod q gives a value in the range -(q-1)..(q-1) * By adding q we get a range of 1..(2q-1). @@ -131,21 +137,25 @@ static ossl_inline ossl_unused uint32_t mod_sub(uint32_t a, uint32_t b) /* * @brief Returns the absolute value in constant time. - * i.e. return is_positive(x) ? x : -x; + * i.e. return is_negative(x) ? -x : x; */ static ossl_inline ossl_unused uint32_t abs_signed(uint32_t x) { - return constant_time_select_32(constant_time_lt_32(x, 0x80000000), x, 0u - x); + uint32_t mask = 0u - (x >> 31); + + return constant_time_select_32(mask, 0u - x, x); } /* * @brief Returns the absolute value modulo q in constant time - * i.e return x > (q - 1) / 2 ? q - x : x; + * i.e return x <= (q-1)/2 ? x : q - x; */ static ossl_inline ossl_unused uint32_t abs_mod_prime(uint32_t x) { - return constant_time_select_32(constant_time_lt_32(ML_DSA_Q_MINUS1_DIV2, x), - ML_DSA_Q - x, x); + uint32_t mask = x - ML_DSA_Q_MINUS1_DIV2; + + mask = 0u - (mask >> 31); + return constant_time_select_32(mask, x, ML_DSA_Q - x); } /* @@ -154,7 +164,9 @@ static ossl_inline ossl_unused uint32_t abs_mod_prime(uint32_t x) */ static ossl_inline ossl_unused uint32_t maximum(uint32_t x, uint32_t y) { - return constant_time_select_int(constant_time_lt(x, y), y, x); + uint32_t mask = x - y; + mask = 0u - (mask >> 31); + return constant_time_select_int(mask, y, x); } #endif /* OSSL_CRYPTO_ML_DSA_LOCAL_H */