]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts
authorEric Biggers <ebiggers@kernel.org>
Sun, 6 Jul 2025 23:10:59 +0000 (16:10 -0700)
committerEric Biggers <ebiggers@kernel.org>
Fri, 11 Jul 2025 21:29:42 +0000 (14:29 -0700)
Restore the SIMD usability check and base conversion that were removed
by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only
interface").

This safety check is cheap and is well worth eliminating a footgun.
While the Poly1305 functions should not be called when SIMD registers
are unusable, if they are anyway, they should just do the right thing
instead of corrupting random tasks' registers and/or computing incorrect
MACs.  Fixing this is also needed for poly1305_kunit to pass.

Just use irq_fpu_usable() instead of the original crypto_simd_usable(),
since poly1305_kunit won't rely on crypto_simd_disabled_for_test.

Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface")
Cc: stable@vger.kernel.org
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
lib/crypto/x86/poly1305_glue.c

index b7e78a583e07f7dce6f2eca63d9be7c444c07ef5..968d846776319c42039ba7a70d04bc8586ab881f 100644 (file)
@@ -25,6 +25,42 @@ struct poly1305_arch_internal {
        struct { u32 r2, r1, r4, r3; } rn[9];
 };
 
+/*
+ * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
+ * the unfortunate situation of using AVX and then having to go back to scalar
+ * -- because the user is silly and has called the update function from two
+ * separate contexts -- then we need to convert back to the original base before
+ * proceeding. It is possible to reason that the initial reduction below is
+ * sufficient given the implementation invariants. However, for an avoidance of
+ * doubt and because this is not performance critical, we do the full reduction
+ * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
+ */
+static void convert_to_base2_64(void *ctx)
+{
+       struct poly1305_arch_internal *state = ctx;
+       u32 cy;
+
+       if (!state->is_base2_26)
+               return;
+
+       cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
+       cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
+       cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
+       cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
+       state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
+       state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
+       state->hs[2] = state->h[4] >> 24;
+       /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */
+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+       cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
+       state->hs[2] &= 3;
+       state->hs[0] += cy;
+       state->hs[1] += (cy = ULT(state->hs[0], cy));
+       state->hs[2] += ULT(state->hs[1], cy);
+#undef ULT
+       state->is_base2_26 = 0;
+}
+
 asmlinkage void poly1305_block_init_arch(
        struct poly1305_block_state *state,
        const u8 raw_key[POLY1305_BLOCK_SIZE]);
@@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp,
        BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
                     SZ_4K % POLY1305_BLOCK_SIZE);
 
-       if (!static_branch_likely(&poly1305_use_avx)) {
+       if (!static_branch_likely(&poly1305_use_avx) ||
+           unlikely(!irq_fpu_usable())) {
+               convert_to_base2_64(ctx);
                poly1305_blocks_x86_64(ctx, inp, len, padbit);
                return;
        }