Conditionally shortcut via the chorba polynomial based on compile flags

author Adam Stylinski <kungfujesus06@gmail.com>

Fri, 21 Nov 2025 15:02:14 +0000 (10:02 -0500)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Sat, 22 Nov 2025 10:27:12 +0000 (11:27 +0100)
author Adam Stylinski <kungfujesus06@gmail.com>
Fri, 21 Nov 2025 15:02:14 +0000 (10:02 -0500)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 22 Nov 2025 10:27:12 +0000 (11:27 +0100)
diff --git a/arch/x86/crc32_fold_pclmulqdq_tpl.h b/arch/x86/crc32_fold_pclmulqdq_tpl.h

index 803a8774a4a4aeb08e251a2555410d66fdc9542c..0a22a4abe5c553373534f1e50d337db506b8a8f6 100644 (file)
--- a/arch/x86/crc32_fold_pclmulqdq_tpl.h
+++ b/arch/x86/crc32_fold_pclmulqdq_tpl.h
@@ -111,6 +111,7 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint
       * the stream at the following offsets: 6, 9, 10, 16, 20, 22,
       * 24, 25, 27, 28, 30, 31, 32 - this is detailed in the paper
       * as "generator_64_bits_unrolled_8" */
+#if !defined(COPY) || defined(__AVX512VL__)
      while (len >= 512 + 64 + 16*8) {
          __m128i chorba8 = _mm_load_si128((__m128i *)src);
          __m128i chorba7 = _mm_load_si128((__m128i *)src + 1);
@@ -322,6 +323,7 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint
          len -= 512;
          src += 512;
      }
+#endif
  
      while (len >= 64) {
          len -= 64;
author	Adam Stylinski <kungfujesus06@gmail.com>
	Fri, 21 Nov 2025 15:02:14 +0000 (10:02 -0500)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Sat, 22 Nov 2025 10:27:12 +0000 (11:27 +0100)