do {
__m256i value, result;
- value = _mm256_loadu_si256((__m256i *)table);
+ value = _mm256_load_si256((__m256i *)table);
result = _mm256_subs_epu16(value, wsize);
- _mm256_storeu_si256((__m256i *)table, result);
+ _mm256_store_si256((__m256i *)table, result);
table -= 16;
entries -= 16;
public:
void SetUp(const ::benchmark::State& state) {
- l0 = (uint16_t *)zng_alloc(HASH_SIZE * sizeof(uint16_t));
+ l0 = (uint16_t *)zng_alloc_aligned(HASH_SIZE * sizeof(uint16_t), 64);
for (uint32_t i = 0; i < HASH_SIZE; i++) {
l0[i] = rand();
}
- l1 = (uint16_t *)zng_alloc(MAX_RANDOM_INTS * sizeof(uint16_t));
+ l1 = (uint16_t *)zng_alloc_aligned(MAX_RANDOM_INTS * sizeof(uint16_t), 64);
for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) {
l1[i] = rand();
}
void TearDown(const ::benchmark::State& state) {
- zng_free(l0);
- zng_free(l1);
+ zng_free_aligned(l0);
+ zng_free_aligned(l1);
free(s_g);
}
};
} \
Bench(state, fptr); \
} \
- BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(1024, MAX_RANDOM_INTS);
+ BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);