From: Hans Kristian Rosbach Date: Sun, 3 Aug 2025 21:04:20 +0000 (+0200) Subject: Use aligned load/store in AVX2 Slide Hash. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=refs%2Fheads%2Fslide-hash-avx2-aligned-load;p=thirdparty%2Fzlib-ng.git Use aligned load/store in AVX2 Slide Hash. Also test slidehash from 512 bytes, the minimum window size we use. --- diff --git a/arch/x86/slide_hash_avx2.c b/arch/x86/slide_hash_avx2.c index 85334732..10000123 100644 --- a/arch/x86/slide_hash_avx2.c +++ b/arch/x86/slide_hash_avx2.c @@ -21,9 +21,9 @@ static inline void slide_hash_chain(Pos *table, uint32_t entries, const __m256i do { __m256i value, result; - value = _mm256_loadu_si256((__m256i *)table); + value = _mm256_load_si256((__m256i *)table); result = _mm256_subs_epu16(value, wsize); - _mm256_storeu_si256((__m256i *)table, result); + _mm256_store_si256((__m256i *)table, result); table -= 16; entries -= 16; diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc index 9d98420b..53e95168 100644 --- a/test/benchmarks/benchmark_slidehash.cc +++ b/test/benchmarks/benchmark_slidehash.cc @@ -25,13 +25,13 @@ private: public: void SetUp(const ::benchmark::State& state) { - l0 = (uint16_t *)zng_alloc(HASH_SIZE * sizeof(uint16_t)); + l0 = (uint16_t *)zng_alloc_aligned(HASH_SIZE * sizeof(uint16_t), 64); for (uint32_t i = 0; i < HASH_SIZE; i++) { l0[i] = rand(); } - l1 = (uint16_t *)zng_alloc(MAX_RANDOM_INTS * sizeof(uint16_t)); + l1 = (uint16_t *)zng_alloc_aligned(MAX_RANDOM_INTS * sizeof(uint16_t), 64); for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) { l1[i] = rand(); @@ -53,8 +53,8 @@ public: } void TearDown(const ::benchmark::State& state) { - zng_free(l0); - zng_free(l1); + zng_free_aligned(l0); + zng_free_aligned(l1); free(s_g); } }; @@ -66,7 +66,7 @@ public: } \ Bench(state, fptr); \ } \ - BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(1024, MAX_RANDOM_INTS); + BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS); BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);