From: Mika T. Lindqvist Date: Sat, 31 Jan 2026 19:44:33 +0000 (+0200) Subject: Slide 32 hash entries per loop iteration when using AVX2. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fd0536885c4eefa89b6feedd708a65f7c9988a63;p=thirdparty%2Fzlib-ng.git Slide 32 hash entries per loop iteration when using AVX2. --- diff --git a/arch/x86/slide_hash_avx2.c b/arch/x86/slide_hash_avx2.c index 523dda88f..241ea305e 100644 --- a/arch/x86/slide_hash_avx2.c +++ b/arch/x86/slide_hash_avx2.c @@ -19,17 +19,20 @@ static inline void slide_hash_chain(Pos *table, uint32_t entries, const __m256i wsize) { table += entries; - table -= 16; + table -= 32; do { - __m256i value, result; + __m256i value1, value2, result1, result2; - value = _mm256_load_si256((__m256i *)table); - result = _mm256_subs_epu16(value, wsize); - _mm256_store_si256((__m256i *)table, result); + value1 = _mm256_load_si256((__m256i *)table); + value2 = _mm256_load_si256((__m256i *)(table+16)); + result1 = _mm256_subs_epu16(value1, wsize); + result2 = _mm256_subs_epu16(value2, wsize); + _mm256_store_si256((__m256i *)table, result1); + _mm256_store_si256((__m256i *)(table+16), result2); - table -= 16; - entries -= 16; + table -= 32; + entries -= 32; } while (entries > 0); }