- Fixed alignment diff calculation in crc32_chorba.
- Fixed length check to happen early, avoiding extra branches for too short lengths,
this also allows removing one function call to crc32_braid_internal to handle those.
Gbench shows ~0.15-0.25ns saved per call for lengths shorter than CHORBA_SMALL_THRESHOLD.
- Avoid calculating aligned len if buffer is already aligned
#endif // OPTIMAL_CMP == 64
Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
+ uint64_t* aligned_buf;
uint32_t c = (~crc) & 0xffffffff;
+ uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7;
- uint64_t* aligned_buf;
- size_t aligned_len;
- unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF;
- if (algn_diff < len) {
+ if (len > algn_diff + CHORBA_SMALL_THRESHOLD) {
if (algn_diff) {
c = crc32_braid_internal(c, buf, algn_diff);
+ len -= algn_diff;
}
aligned_buf = (uint64_t*) (buf + algn_diff);
- aligned_len = len - algn_diff;
- if(aligned_len > CHORBA_LARGE_THRESHOLD)
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+ if(len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
+# if OPTIMAL_CMP == 64
+ } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
+ c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len);
+# endif
+ } else {
# if OPTIMAL_CMP == 64
- else if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
- c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
- else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT)
- c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
+ c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len);
# else
- else if (aligned_len > CHORBA_SMALL_THRESHOLD_32BIT)
- c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, aligned_len);
+ c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len);
# endif
- else
- c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
- }
- else {
+ }
+ } else {
+ // Process too short lengths using crc32_braid
c = crc32_braid_internal(c, buf, len);
}
}
Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
- uint32_t c;
uint64_t* aligned_buf;
- size_t aligned_len;
+ uint32_t c = (~crc) & 0xffffffff;
+ uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
- c = (~crc) & 0xffffffff;
- unsigned long algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
- if (algn_diff < len) {
+ if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
if (algn_diff) {
c = crc32_braid_internal(c, buf, algn_diff);
+ len -= algn_diff;
}
aligned_buf = (uint64_t*) (buf + algn_diff);
- aligned_len = len - algn_diff;
#if !defined(WITHOUT_CHORBA)
- if(aligned_len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+ if(len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
} else
#endif
- if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
- c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
- } else {
- c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
+ {
+ c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
}
- }
- else {
+ } else {
+ // Process too short lengths using crc32_braid
c = crc32_braid_internal(c, buf, len);
}
}
Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
- uint32_t c;
uint64_t* aligned_buf;
- size_t aligned_len;
-
- c = (~crc) & 0xffffffff;
+ uint32_t c = (~crc) & 0xffffffff;
uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
- if (algn_diff < len) {
+
+ if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
if (algn_diff) {
c = crc32_braid_internal(c, buf, algn_diff);
+ len -= algn_diff;
}
aligned_buf = (uint64_t*) (buf + algn_diff);
- aligned_len = len - algn_diff;
#if !defined(WITHOUT_CHORBA)
- if(aligned_len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+ if(len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
} else
#endif
- if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD &&
- aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
- c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, aligned_len);
- } else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
- c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
+ if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
+ c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len);
} else {
- c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
+ c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
}
- }
- else {
+ } else {
+ // Process too short lengths using crc32_braid
c = crc32_braid_internal(c, buf, len);
}
#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
#define CHORBA_SMALL_THRESHOLD_64BIT 72
-#define CHORBA_SMALL_THRESHOLD_32BIT 80
+#if OPTIMAL_CMP == 64
+# define CHORBA_SMALL_THRESHOLD 72
+#else
+# define CHORBA_SMALL_THRESHOLD 80
+#endif
typedef struct crc32_fold_s {
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];