-/* benchmark_crc32_fold_copy.cc -- benchmark for crc32 implementations doing folded copying
+/* benchmark_crc32_copy.cc -- benchmark for crc32 implementations with copying
* Copyright (C) 2025 Hans Kristian Rosbach
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#define BUFSIZE (32768 + 16 + 16)
-// We have no function that gives us direct access to these, so we have a local implementation for benchmarks
-static void crc32_fold_copy_braid(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc->value = crc32_braid(crc->value, src, len);
- memcpy(dst, src, len);
-}
-#ifndef WITHOUT_CHORBA
-static void crc32_fold_copy_chorba(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc->value = crc32_chorba(crc->value, src, len);
- memcpy(dst, src, len);
-}
-#endif
-#ifndef WITHOUT_CHORBA_SSE
-# ifdef X86_SSE2
- static void crc32_fold_copy_chorba_sse2(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc->value = crc32_chorba_sse2(crc->value, src, len);
- memcpy(dst, src, len);
- }
-# endif
-# ifdef X86_SSE41
- static void crc32_fold_copy_chorba_sse41(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc->value = crc32_chorba_sse41(crc->value, src, len);
- memcpy(dst, src, len);
- }
-# endif
-#endif
-
-class crc32_fc: public benchmark::Fixture {
+class crc32_copy: public benchmark::Fixture {
protected:
uint32_t *testdata;
uint8_t *dstbuf;
- uint32_t crc;
public:
void SetUp(const ::benchmark::State&) {
}
}
- void Bench(benchmark::State& state, crc32_fold_reset_func fold_reset, crc32_fold_copy_func fold_copy,
- crc32_fold_final_func fold_final) {
- ALIGNED_(16) crc32_fold crc_st;
+ void Bench(benchmark::State& state, crc32_copy_func crc32_copy) {
int misalign = 0;
- // Prepare an initial crc state
- fold_reset(&crc_st);
- crc = 0;
+ uint32_t crc = 0;
- // Benchmark the CRC32 fold copy operation
+ // Benchmark the CRC32 copy operation
for (auto _ : state) {
- fold_copy(&crc_st, dstbuf + misalign, (const unsigned char*)testdata + misalign, (size_t)state.range(0));
+ crc = crc32_copy(crc, dstbuf + misalign, (const unsigned char*)testdata + misalign, (size_t)state.range(0));
misalign++;
if (misalign > 14)
misalign = 0;
}
- // Finalize the CRC32 calculation
- crc = fold_final(&crc_st);
-
// Prevent the result from being optimized away
benchmark::DoNotOptimize(crc);
}
}
};
-#define BENCHMARK_CRC32_FOLD(name, resfunc, copyfunc, finfunc, support_flag) \
- BENCHMARK_DEFINE_F(crc32_fc, name)(benchmark::State& state) { \
+#define BENCHMARK_CRC32_COPY(name, copyfunc, support_flag) \
+ BENCHMARK_DEFINE_F(crc32_copy, name)(benchmark::State& state) { \
if (!(support_flag)) { \
state.SkipWithError("CPU does not support " #name); \
} \
- Bench(state, resfunc, copyfunc, finfunc); \
+ Bench(state, copyfunc); \
} \
- BENCHMARK_REGISTER_F(crc32_fc, name)->Arg(16)->Arg(48)->Arg(192)->Arg(512)->Arg(4<<10)->Arg(16<<10)->Arg(32<<10);
+ BENCHMARK_REGISTER_F(crc32_copy, name)->Arg(16)->Arg(48)->Arg(192)->Arg(512)->Arg(4<<10)->Arg(16<<10)->Arg(32<<10);
-// Generic
-BENCHMARK_CRC32_FOLD(braid_c, crc32_fold_reset_c, crc32_fold_copy_braid, crc32_fold_final_c, 1)
+// Base test
+BENCHMARK_CRC32_COPY(braid, crc32_copy_braid, 1);
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Native
- BENCHMARK_CRC32_FOLD(native, native_crc32_fold_reset, native_crc32_fold_copy, native_crc32_fold_final, 1)
+ BENCHMARK_CRC32_COPY(native, native_crc32_copy, 1)
#else
-
// Optimized functions
# ifndef WITHOUT_CHORBA
- BENCHMARK_CRC32_FOLD(chorba_c, crc32_fold_reset_c, crc32_fold_copy_chorba, crc32_fold_final_c, 1)
-# endif
-# ifdef ARM_CRC32
- BENCHMARK_CRC32_FOLD(armv8, crc32_fold_reset_c, crc32_fold_copy_armv8, crc32_fold_final_c, test_cpu_features.arm.has_crc32)
+ BENCHMARK_CRC32_COPY(chorba, crc32_copy_chorba, 1)
# endif
# ifndef WITHOUT_CHORBA_SSE
# ifdef X86_SSE2
- BENCHMARK_CRC32_FOLD(chorba_sse2, crc32_fold_reset_c, crc32_fold_copy_chorba_sse2, crc32_fold_final_c, test_cpu_features.x86.has_sse2)
+ BENCHMARK_CRC32_COPY(chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2);
# endif
# ifdef X86_SSE41
- BENCHMARK_CRC32_FOLD(chorba_sse41, crc32_fold_reset_c, crc32_fold_copy_chorba_sse41, crc32_fold_final_c, test_cpu_features.x86.has_sse41)
-# endif
+ BENCHMARK_CRC32_COPY(chorba_sse41, crc32_copy_chorba_sse41, test_cpu_features.x86.has_sse41);
+# endif
+# endif
+# ifdef ARM_CRC32
+ BENCHMARK_CRC32_COPY(armv8, crc32_copy_armv8, test_cpu_features.arm.has_crc32)
+# endif
+# ifdef LOONGARCH_CRC
+ BENCHMARK_CRC32_COPY(loongarch, crc32_copy_loongarch64, test_cpu_features.loongarch.has_crc)
+# endif
+# ifdef POWER8_VSX_CRC32
+ BENCHMARK_CRC32_COPY(power8, crc32_copy_power8, test_cpu_features.power.has_arch_2_07)
+# endif
+# ifdef RISCV_CRC32_ZBC
+ BENCHMARK_CRC32_COPY(riscv, crc32_copy_riscv64_zbc, test_cpu_features.riscv.has_zbc)
+# endif
+# ifdef S390_CRC32_VX
+ BENCHMARK_CRC32_COPY(vx, crc32_copy_s390_vx, test_cpu_features.s390.has_vx)
# endif
# ifdef X86_PCLMULQDQ_CRC
- BENCHMARK_CRC32_FOLD(pclmulqdq, crc32_fold_pclmulqdq_reset, crc32_fold_pclmulqdq_copy, crc32_fold_pclmulqdq_final, test_cpu_features.x86.has_pclmulqdq)
+ BENCHMARK_CRC32_COPY(pclmulqdq, crc32_copy_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
# endif
# ifdef X86_VPCLMULQDQ_CRC
- BENCHMARK_CRC32_FOLD(vpclmulqdq, crc32_fold_pclmulqdq_reset, crc32_fold_vpclmulqdq_copy, crc32_fold_pclmulqdq_final, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
-# endif
-# ifdef LOONGARCH_CRC
- BENCHMARK_CRC32_FOLD(loongarch64, crc32_fold_reset_c, crc32_fold_copy_loongarch64, crc32_fold_final_c, test_cpu_features.loongarch.has_crc)
+ BENCHMARK_CRC32_COPY(vpclmulqdq, crc32_copy_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
# endif
#endif
-/* test_crc32_fold_copy.cc -- test for crc32 implementations doing folded copying
+/* test_crc32_copy.cc -- test for crc32 implementations while copying
* Copyright (C) 2025 Hans Kristian Rosbach
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#define BUFSIZE 615336U
-class crc32_fc_variant : public ::testing::TestWithParam<crc32_test> {
+class crc32_copy_variant : public ::testing::TestWithParam<crc32_test> {
protected:
uint8_t dstbuf[BUFSIZE];
public:
- /* Ensure that crc32 fold copy functions returns the correct crc and copies the data */
- void crc32_fold_test(size_t minlen, int onlyzero, crc32_fold_reset_func fold_reset, crc32_fold_copy_func fold_copy,
- crc32_fold_final_func fold_final, crc32_test params) {
- ALIGNED_(16) crc32_fold crc_st;
- uint32_t crc;
+ /* Ensure that crc32 copy functions returns the correct crc and copies the data */
+ void crc32_copy_test(crc32_copy_func copyfunc, crc32_test params) {
+ uint32_t crc = 0;
ASSERT_LE(params.len, BUFSIZE);
- // Some optimized functions cannot take a crc value as start point
- // and some have minimum length requirements
- if (params.buf == NULL || params.len < minlen || (onlyzero && params.crc != 0)) {
+ if (params.buf == NULL) {
GTEST_SKIP();
}
- fold_reset(&crc_st);
- crc_st.value = params.crc;
-
- fold_copy(&crc_st, dstbuf, params.buf, params.len);
- crc = fold_final(&crc_st);
+ crc = copyfunc(params.crc, dstbuf, params.buf, params.len);
EXPECT_EQ(crc, params.expect);
EXPECT_EQ(0, memcmp(params.buf, dstbuf, params.len));
}
};
-INSTANTIATE_TEST_SUITE_P(crc32_fc, crc32_fc_variant, testing::ValuesIn(crc32_tests));
+INSTANTIATE_TEST_SUITE_P(crc32_copy, crc32_copy_variant, testing::ValuesIn(crc32_tests));
-#define TEST_CRC32_FOLD(name, minlen, onlyzero, resfunc, copyfunc, finfunc, support_flag) \
- TEST_P(crc32_fc_variant, name) { \
+#define TEST_CRC32_COPY(name, copyfunc, support_flag) \
+ TEST_P(crc32_copy_variant, name) { \
if (!(support_flag)) { \
GTEST_SKIP(); \
return; \
} \
- crc32_fold_test(minlen, onlyzero, resfunc, copyfunc, finfunc, GetParam()); \
+ crc32_copy_test(copyfunc, GetParam()); \
}
-// Generic test
-TEST_CRC32_FOLD(generic, 0, 0, crc32_fold_reset_c, crc32_fold_copy_c, crc32_fold_final_c, 1)
+// Base test
+TEST_CRC32_COPY(braid, crc32_copy_braid, 1)
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Native test
- TEST_CRC32_FOLD(native, 16, 1, native_crc32_fold_reset, native_crc32_fold_copy, native_crc32_fold_final, 1)
+ TEST_CRC32_COPY(native, native_crc32_copy, 1)
#else
-
- // Tests of optimized functions
+ // Optimized functions
+# ifndef WITHOUT_CHORBA
+ TEST_CRC32_COPY(chorba, crc32_copy_chorba, 1)
+# endif
+# ifndef WITHOUT_CHORBA_SSE
+# ifdef X86_SSE2
+ TEST_CRC32_COPY(chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2)
+# endif
+# ifdef X86_SSE41
+ TEST_CRC32_COPY(chorba_sse41, crc32_copy_chorba_sse41, test_cpu_features.x86.has_sse41)
+# endif
+# endif
# ifdef ARM_CRC32
- TEST_CRC32_FOLD(armv8, 0, 0, crc32_fold_reset_c, crc32_fold_copy_armv8, crc32_fold_final_c, test_cpu_features.arm.has_crc32)
+ TEST_CRC32_COPY(armv8, crc32_copy_armv8, test_cpu_features.arm.has_crc32)
+# endif
+# ifdef LOONGARCH_CRC
+ TEST_CRC32_COPY(loongarch, crc32_copy_loongarch64, test_cpu_features.loongarch.has_crc)
+# endif
+# ifdef RISCV_CRC32_ZBC
+ TEST_CRC32_COPY(riscv, crc32_copy_riscv64_zbc, test_cpu_features.riscv.has_zbc)
# endif
# ifdef X86_PCLMULQDQ_CRC
- // Is 16 bytes len the minimum for pclmul functions?
- TEST_CRC32_FOLD(pclmulqdq, 16, 1, crc32_fold_pclmulqdq_reset, crc32_fold_pclmulqdq_copy, crc32_fold_pclmulqdq_final, test_cpu_features.x86.has_pclmulqdq)
+ TEST_CRC32_COPY(pclmulqdq, crc32_copy_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
# endif
# ifdef X86_VPCLMULQDQ_CRC
- TEST_CRC32_FOLD(vpclmulqdq, 16, 1, crc32_fold_pclmulqdq_reset, crc32_fold_vpclmulqdq_copy, crc32_fold_pclmulqdq_final, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
-# endif
-# ifdef LOONGARCH_CRC
- TEST_CRC32_FOLD(loongarch64, 0, 0, crc32_fold_reset_c, crc32_fold_copy_loongarch64, crc32_fold_final_c, test_cpu_features.loongarch.has_crc)
+ TEST_CRC32_COPY(vpclmulqdq, crc32_copy_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
# endif
#endif