From 434efaa2dcee39a8023f72140f1731374cfbae28 Mon Sep 17 00:00:00 2001 From: Hans Kristian Rosbach Date: Sun, 3 Aug 2025 23:04:20 +0200 Subject: [PATCH] Use aligned load/store in AVX2 Slide Hash. Also test slidehash from 512 bytes, the minimum window size we use. --- arch/x86/slide_hash_avx2.c | 4 ++-- test/benchmarks/benchmark_slidehash.cc | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/slide_hash_avx2.c b/arch/x86/slide_hash_avx2.c index 85334732..10000123 100644 --- a/arch/x86/slide_hash_avx2.c +++ b/arch/x86/slide_hash_avx2.c @@ -21,9 +21,9 @@ static inline void slide_hash_chain(Pos *table, uint32_t entries, const __m256i do { __m256i value, result; - value = _mm256_loadu_si256((__m256i *)table); + value = _mm256_load_si256((__m256i *)table); result = _mm256_subs_epu16(value, wsize); - _mm256_storeu_si256((__m256i *)table, result); + _mm256_store_si256((__m256i *)table, result); table -= 16; entries -= 16; diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc index 9d98420b..53e95168 100644 --- a/test/benchmarks/benchmark_slidehash.cc +++ b/test/benchmarks/benchmark_slidehash.cc @@ -25,13 +25,13 @@ private: public: void SetUp(const ::benchmark::State& state) { - l0 = (uint16_t *)zng_alloc(HASH_SIZE * sizeof(uint16_t)); + l0 = (uint16_t *)zng_alloc_aligned(HASH_SIZE * sizeof(uint16_t), 64); for (uint32_t i = 0; i < HASH_SIZE; i++) { l0[i] = rand(); } - l1 = (uint16_t *)zng_alloc(MAX_RANDOM_INTS * sizeof(uint16_t)); + l1 = (uint16_t *)zng_alloc_aligned(MAX_RANDOM_INTS * sizeof(uint16_t), 64); for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) { l1[i] = rand(); @@ -53,8 +53,8 @@ public: } void TearDown(const ::benchmark::State& state) { - zng_free(l0); - zng_free(l1); + zng_free_aligned(l0); + zng_free_aligned(l1); free(s_g); } }; @@ -66,7 +66,7 @@ public: } \ Bench(state, fptr); \ } \ - BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(1024, MAX_RANDOM_INTS); + BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS); BENCHMARK_SLIDEHASH(c, slide_hash_c, 1); -- 2.47.2