Use aligned load/store in AVX2 Slide Hash.

author Hans Kristian Rosbach <hk-git@circlestorm.org>

Sun, 3 Aug 2025 21:04:20 +0000 (23:04 +0200)

committer Hans Kristian Rosbach <hk-git@circlestorm.org>

Sat, 23 Aug 2025 17:15:02 +0000 (19:15 +0200)
author Hans Kristian Rosbach <hk-git@circlestorm.org>
Sun, 3 Aug 2025 21:04:20 +0000 (23:04 +0200)
committer Hans Kristian Rosbach <hk-git@circlestorm.org>
Sat, 23 Aug 2025 17:15:02 +0000 (19:15 +0200)
diff --git a/arch/x86/slide_hash_avx2.c b/arch/x86/slide_hash_avx2.c

index 853347323436eb43e14653a44fbee979e2ed8bb7..1000012392a773babaec4048d176bd639f504734 100644 (file)
--- a/arch/x86/slide_hash_avx2.c
+++ b/arch/x86/slide_hash_avx2.c
@@ -21,9 +21,9 @@ static inline void slide_hash_chain(Pos *table, uint32_t entries, const __m256i
      do {
          __m256i value, result;
  
-        value = _mm256_loadu_si256((__m256i *)table);
+        value = _mm256_load_si256((__m256i *)table);
          result = _mm256_subs_epu16(value, wsize);
-        _mm256_storeu_si256((__m256i *)table, result);
+        _mm256_store_si256((__m256i *)table, result);
  
          table -= 16;
          entries -= 16;
diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc

index 9d98420b16724b620168496e6a5fa79901ff737c..53e9516817770141a7e4d03d53de7b15143d03cf 100644 (file)
--- a/test/benchmarks/benchmark_slidehash.cc
+++ b/test/benchmarks/benchmark_slidehash.cc
@@ -25,13 +25,13 @@ private:
  
  public:
      void SetUp(const ::benchmark::State& state) {
-        l0 = (uint16_t *)zng_alloc(HASH_SIZE * sizeof(uint16_t));
+        l0 = (uint16_t *)zng_alloc_aligned(HASH_SIZE * sizeof(uint16_t), 64);
  
          for (uint32_t i = 0; i < HASH_SIZE; i++) {
              l0[i] = rand();
          }
  
-        l1 = (uint16_t *)zng_alloc(MAX_RANDOM_INTS * sizeof(uint16_t));
+        l1 = (uint16_t *)zng_alloc_aligned(MAX_RANDOM_INTS * sizeof(uint16_t), 64);
  
          for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) {
              l1[i] = rand();
@@ -53,8 +53,8 @@ public:
      }
  
      void TearDown(const ::benchmark::State& state) {
-        zng_free(l0);
-        zng_free(l1);
+        zng_free_aligned(l0);
+        zng_free_aligned(l1);
          free(s_g);
      }
  };
@@ -66,7 +66,7 @@ public:
          } \
          Bench(state, fptr); \
      } \
-    BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(1024, MAX_RANDOM_INTS);
+    BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
  
  BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
author	Hans Kristian Rosbach <hk-git@circlestorm.org>
	Sun, 3 Aug 2025 21:04:20 +0000 (23:04 +0200)
committer	Hans Kristian Rosbach <hk-git@circlestorm.org>
	Sat, 23 Aug 2025 17:15:02 +0000 (19:15 +0200)
arch/x86/slide_hash_avx2.c		patch \| blob \| blame \| history
test/benchmarks/benchmark_slidehash.cc		patch \| blob \| blame \| history