#endif /* BRAID_W */
/* ========================================================================= */
-Z_INTERNAL uint32_t crc32_braid_internal(uint32_t c, const uint8_t *buf, size_t len) {
+Z_INTERNAL uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, size_t len) {
+ crc = ~crc;
#ifdef BRAID_W
/* If provided enough bytes, do a braided CRC calculation. */
/* Compute the CRC up to a z_word_t boundary. */
size_t align_diff = (size_t)MIN(ALIGN_DIFF(buf, BRAID_W), len);
if (align_diff) {
- c = crc32_copy_small(c, NULL, buf, align_diff, BRAID_W - 1, 0);
+ crc = crc32_copy_small(crc, NULL, buf, align_diff, BRAID_W - 1, 0);
len -= align_diff;
buf += align_diff;
}
#endif
#endif
/* Initialize the CRC for each braid. */
- crc0 = Z_WORD_FROM_LE(c);
+ crc0 = Z_WORD_FROM_LE(crc);
#if BRAID_N > 1
crc1 = 0;
#if BRAID_N > 2
#endif
words += BRAID_N;
Assert(comb <= UINT32_MAX, "comb should fit in uint32_t");
- c = (uint32_t)Z_WORD_FROM_LE(comb);
+ crc = (uint32_t)Z_WORD_FROM_LE(comb);
/* Update the pointer to the remaining bytes to process. */
buf = (const unsigned char *)words;
#endif /* BRAID_W */
/* Complete the computation of the CRC on any remaining bytes. */
- return crc32_copy_small(c, NULL, buf, len, (BRAID_N * BRAID_W) - 1, 0);
-}
-
-Z_INTERNAL uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, size_t len) {
- return ~crc32_braid_internal(~crc, buf, len);
+ return ~crc32_copy_small(crc, NULL, buf, len, (BRAID_N * BRAID_W) - 1, 0);
}
Z_INTERNAL uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
size_t i = 0;
- z_word_t next1 = Z_WORD_FROM_LE(crc);
+ z_word_t next1 = Z_WORD_FROM_LE(~crc);
z_word_t next2 = 0;
z_word_t next3 = 0;
#if defined(__EMSCRIPTEN__)
zng_free(bitbuffer);
#endif
- return crc;
+ return ~crc;
}
# if OPTIMAL_CMP == 64
/* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */
-Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive (uint32_t crc, const uint64_t* input, size_t len) {
+Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64_t* input, size_t len) {
uint64_t bitbuffer[32768 / sizeof(uint64_t)];
const uint8_t* bitbufferbytes = (const uint8_t*) bitbuffer;
memset(bitbuffer, 0, 32768);
- bitbuffer[0] = Z_U64_TO_LE(crc);
+ bitbuffer[0] = Z_U64_TO_LE(~crc);
crc = 0;
crc = crc_table[(crc ^ final_bytes[j] ^ bitbufferbytes[(j+i)]) & 0xff] ^ (crc >> 8);
}
- return crc;
+ return ~crc;
}
/* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */
-Z_INTERNAL uint32_t crc32_chorba_small_nondestructive (uint32_t crc, const uint64_t* input, size_t len) {
+Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint64_t* input, size_t len) {
uint64_t final[9] = {0};
- uint64_t next1 = crc;
+ uint64_t next1 = ~crc;
crc = 0;
uint64_t next2 = 0;
uint64_t next3 = 0;
final[3] ^= Z_U64_TO_LE(next4);
final[4] ^= Z_U64_TO_LE(next5);
- crc = crc32_braid_internal(crc, (uint8_t*) final, len-i);
-
- return crc;
+ return crc32_braid(~crc, (uint8_t*)final, len-i);
}
#else // OPTIMAL_CMP == 64
Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const uint32_t* input, size_t len) {
uint32_t final[20] = {0};
- uint32_t next1 = crc;
+ uint32_t next1 = ~crc;
crc = 0;
uint32_t next2 = 0;
uint32_t next3 = 0;
final[8] ^= Z_U32_TO_LE(next9);
final[9] ^= Z_U32_TO_LE(next10);
- crc = crc32_braid_internal(crc, (uint8_t*) final, len-i);
-
- return crc;
+ return crc32_braid(~crc, (uint8_t*)final, len-i);
}
#endif // OPTIMAL_CMP == 64
Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
- uint64_t *aligned_buf;
- uint32_t c = ~crc;
uintptr_t align_diff = ALIGN_DIFF(buf, 8);
+ if (len <= align_diff + CHORBA_SMALL_THRESHOLD)
+ return crc32_braid(crc, buf, len);
- if (len > align_diff + CHORBA_SMALL_THRESHOLD) {
- if (align_diff) {
- c = crc32_braid_internal(c, buf, align_diff);
- len -= align_diff;
- }
- aligned_buf = (uint64_t*)(buf + align_diff);
- if(len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len);
-# if OPTIMAL_CMP == 64
- } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
- c = crc32_chorba_32768_nondestructive(c, (uint64_t*)aligned_buf, len);
-# endif
- } else {
-# if OPTIMAL_CMP == 64
- c = crc32_chorba_small_nondestructive(c, (uint64_t*)aligned_buf, len);
-# else
- c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*)aligned_buf, len);
-# endif
- }
- } else {
- // Process too short lengths using crc32_braid
- c = crc32_braid_internal(c, buf, len);
+ if (align_diff) {
+ crc = crc32_braid(crc, buf, align_diff);
+ len -= align_diff;
+ buf += align_diff;
}
-
- /* Return the CRC, post-conditioned. */
- return ~c;
+ if (len > CHORBA_LARGE_THRESHOLD)
+ return crc32_chorba_118960_nondestructive(crc, (const z_word_t*)buf, len);
+#if OPTIMAL_CMP == 64
+ if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
+ return crc32_chorba_32768_nondestructive(crc, (const uint64_t*)buf, len);
+ return crc32_chorba_small_nondestructive(crc, (const uint64_t*)buf, len);
+#else
+ return crc32_chorba_small_nondestructive_32bit(crc, (const uint32_t*)buf, len);
+#endif
}
uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, size_t len);
-uint32_t crc32_braid_internal(uint32_t c, const uint8_t *buf, size_t len);
uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
#ifndef WITHOUT_CHORBA
d = _mm_srli_epi64(invec, 20); \
} while (0);
-Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_t* buf, size_t len) {
- const uint64_t* input = buf;
+Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_t* input, size_t len) {
ALIGNED_(16) uint64_t final[9] = {0};
- uint64_t next1 = crc;
+ uint64_t next1 = ~crc;
crc = 0;
uint64_t next2 = 0;
uint64_t next3 = 0;
crc = crc_table[(crc ^ *final_bytes++) & 0xff] ^ (crc >> 8);
}
- return crc;
+ return ~crc;
}
Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
- uint64_t *aligned_buf;
- uint32_t c = ~crc;
uintptr_t align_diff = ALIGN_DIFF(buf, 16);
+ if (len <= align_diff + CHORBA_SMALL_THRESHOLD_64BIT)
+ return crc32_braid(crc, buf, len);
- if (len > align_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
- if (align_diff) {
- c = crc32_braid_internal(c, buf, align_diff);
- len -= align_diff;
- }
- aligned_buf = (uint64_t*)(buf + align_diff);
+ if (align_diff) {
+ crc = crc32_braid(crc, buf, align_diff);
+ len -= align_diff;
+ buf += align_diff;
+ }
#if !defined(WITHOUT_CHORBA)
- if (len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len);
- } else
+ if (len > CHORBA_LARGE_THRESHOLD)
+ return crc32_chorba_118960_nondestructive(crc, (const z_word_t*)buf, len);
#endif
- {
- c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
- }
- } else {
- // Process too short lengths using crc32_braid
- c = crc32_braid_internal(c, buf, len);
- }
-
- /* Return the CRC, post-conditioned. */
- return ~c;
+ return chorba_small_nondestructive_sse2(crc, (const uint64_t*)buf, len);
}
Z_INTERNAL uint32_t crc32_copy_chorba_sse2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
out3 = _mm_xor_si128(in[4], xor3); \
} while (0)
-Z_FORCEINLINE static uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t crc, const uint64_t* buf, size_t len) {
- const uint64_t* input = buf;
+Z_FORCEINLINE static uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t crc, const uint64_t* input, size_t len) {
ALIGNED_(16) uint64_t bitbuffer[32768 / sizeof(uint64_t)];
__m128i *bitbuffer_v = (__m128i*)bitbuffer;
const uint8_t* bitbufferbytes = (const uint8_t*) bitbuffer;
}
/* We need to mix this in */
- __m128i init_crc = _mm_cvtsi64_si128(crc);
+ __m128i init_crc = _mm_cvtsi64_si128(~crc);
crc = 0;
size_t i = 0;
for(size_t j = 0; j < (len-i); j++) {
crc = crc_table[(crc ^ final_bytes[j] ^ bitbufferbytes[(j+i)]) & 0xff] ^ (crc >> 8);
}
- return crc;
+ return ~crc;
}
Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
- uint64_t *aligned_buf;
- uint32_t c = ~crc;
uintptr_t align_diff = ALIGN_DIFF(buf, 16);
+ if (len <= align_diff + CHORBA_SMALL_THRESHOLD_64BIT)
+ return crc32_braid(crc, buf, len);
- if (len > align_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
- if (align_diff) {
- c = crc32_braid_internal(c, buf, align_diff);
- len -= align_diff;
- }
- aligned_buf = (uint64_t*)(buf + align_diff);
+ if (align_diff) {
+ crc = crc32_braid(crc, buf, align_diff);
+ len -= align_diff;
+ buf += align_diff;
+ }
#if !defined(WITHOUT_CHORBA)
- if (len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len);
- } else
+ if (len > CHORBA_LARGE_THRESHOLD)
+ return crc32_chorba_118960_nondestructive(crc, (z_word_t*)buf, len);
#endif
- if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
- c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len);
- } else {
- c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
- }
- } else {
- // Process too short lengths using crc32_braid
- c = crc32_braid_internal(c, buf, len);
- }
-
- /* Return the CRC, post-conditioned. */
- return ~c;
+ if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
+ return crc32_chorba_32768_nondestructive_sse41(crc, (const uint64_t*)buf, len);
+ return chorba_small_nondestructive_sse2(crc, (const uint64_t*)buf, len);
}
Z_INTERNAL uint32_t crc32_copy_chorba_sse41(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {