Just before returning, blake2s_compress_ssse3() and
blake2s_compress_avx512() store updated values to the 'h', 't', and 'f'
fields of struct blake2s_ctx. But 'f' is always unchanged (which is
correct; only the C code changes it). So, there's no need to write to
'f'. Use 64-bit stores (movq and vmovq) instead of 128-bit stores
(movdqu and vmovdqu) so that only 't' is written.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20251102234209.62133-6-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
movdqu %xmm0,(CTX) // Store new h[0..3]
movdqu %xmm1,16(CTX) // Store new h[4..7]
- movdqu %xmm14,32(CTX) // Store new t and f
+ movq %xmm14,32(CTX) // Store new t (f is unchanged)
RET
SYM_FUNC_END(blake2s_compress_ssse3)
vmovdqu %xmm0,(CTX) // Store new h[0..3]
vmovdqu %xmm1,16(CTX) // Store new h[4..7]
- vmovdqu %xmm4,32(CTX) // Store new t and f
+ vmovq %xmm4,32(CTX) // Store new t (f is unchanged)
vzeroupper
RET
SYM_FUNC_END(blake2s_compress_avx512)