From 2f22115709fc7ebcfa40af3367a508fbbd2f71e9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 2 Nov 2025 15:42:04 -0800 Subject: [PATCH] lib/crypto: x86/blake2s: Fix 32-bit arg treated as 64-bit In the C code, the 'inc' argument to the assembly functions blake2s_compress_ssse3() and blake2s_compress_avx512() is declared with type u32, matching blake2s_compress(). The assembly code then reads it from the 64-bit %rcx. However, the ABI doesn't guarantee zero-extension to 64 bits, nor do gcc or clang guarantee it. Therefore, fix these functions to read this argument from the 32-bit %ecx. In theory, this bug could have caused the wrong 'inc' value to be used, causing incorrect BLAKE2s hashes. In practice, probably not: I've fixed essentially this same bug in many other assembly files too, but there's never been a real report of it having caused a problem. In x86_64, all writes to 32-bit registers are zero-extended to 64 bits. That results in zero-extension in nearly all situations. I've only been able to demonstrate a lack of zero-extension with a somewhat contrived example involving truncation, e.g. when the C code has a u64 variable holding 0x1234567800000040 and passes it as a u32 expecting it to be truncated to 0x40 (64). But that's not what the real code does, of course. Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation") Cc: stable@vger.kernel.org Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251102234209.62133-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/x86/blake2s-core.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/crypto/x86/blake2s-core.S b/lib/crypto/x86/blake2s-core.S index ef8e9f427aab3..093e7814f3879 100644 --- a/lib/crypto/x86/blake2s-core.S +++ b/lib/crypto/x86/blake2s-core.S @@ -52,7 +52,7 @@ SYM_FUNC_START(blake2s_compress_ssse3) movdqa ROT16(%rip),%xmm12 movdqa ROR328(%rip),%xmm13 movdqu 0x20(%rdi),%xmm14 - movq %rcx,%xmm15 + movd %ecx,%xmm15 leaq SIGMA+0xa0(%rip),%r8 jmp .Lbeginofloop .align 32 @@ -176,7 +176,7 @@ SYM_FUNC_START(blake2s_compress_avx512) vmovdqu (%rdi),%xmm0 vmovdqu 0x10(%rdi),%xmm1 vmovdqu 0x20(%rdi),%xmm4 - vmovq %rcx,%xmm5 + vmovd %ecx,%xmm5 vmovdqa IV(%rip),%xmm14 vmovdqa IV+16(%rip),%xmm15 jmp .Lblake2s_compress_avx512_mainloop -- 2.47.3