]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
- Unify crc32_chorba, chorba_sse2 and chorba_sse41 dispatch functions.
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Tue, 11 Nov 2025 21:47:52 +0000 (22:47 +0100)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 14 Nov 2025 23:46:38 +0000 (00:46 +0100)
- Fixed alignment diff calculation in crc32_chorba.
- Fixed length check to happen early, avoiding extra branches for too short lengths,
this also allows removing one function call to crc32_braid_internal to handle those.
Gbench shows ~0.15-0.25ns saved per call for lengths shorter than CHORBA_SMALL_THRESHOLD.
- Avoid calculating aligned len if buffer is already aligned

arch/generic/crc32_chorba_c.c
arch/x86/chorba_sse2.c
arch/x86/chorba_sse41.c
crc32.h

index 4041abd46e4462bdeb0dccc9f2798dd775f4b689..6f90d3c09fcf2eb4f8ffe7933e0b3d6919a46861 100644 (file)
@@ -1448,32 +1448,31 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const
 #endif // OPTIMAL_CMP == 64
 
 Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
+    uint64_t* aligned_buf;
     uint32_t c = (~crc) & 0xffffffff;
+    uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7;
 
-    uint64_t* aligned_buf;
-    size_t aligned_len;
-    unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF;
-    if (algn_diff < len) {
+    if (len > algn_diff + CHORBA_SMALL_THRESHOLD) {
         if (algn_diff) {
             c = crc32_braid_internal(c, buf, algn_diff);
+            len -= algn_diff;
         }
         aligned_buf = (uint64_t*) (buf + algn_diff);
-        aligned_len = len - algn_diff;
-        if(aligned_len > CHORBA_LARGE_THRESHOLD)
-            c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+        if(len > CHORBA_LARGE_THRESHOLD) {
+            c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
+#  if OPTIMAL_CMP == 64
+        } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
+            c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len);
+#  endif
+        } else {
 #  if OPTIMAL_CMP == 64
-        else if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
-            c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
-        else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT)
-            c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
+            c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len);
 #  else
-        else if (aligned_len > CHORBA_SMALL_THRESHOLD_32BIT)
-            c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, aligned_len);
+            c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len);
 #  endif
-        else
-            c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
-    }
-    else {
+        }
+    } else {
+        // Process too short lengths using crc32_braid
         c = crc32_braid_internal(c, buf, len);
     }
 
index 3e25d7586b29e3ffee9da6f9fda0adc4034d4a27..f79a5ac00d456c2319c29a9d3f46b9f800857491 100644 (file)
@@ -847,30 +847,26 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_
 }
 
 Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
-    uint32_t c;
     uint64_t* aligned_buf;
-    size_t aligned_len;
+    uint32_t c = (~crc) & 0xffffffff;
+    uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
 
-    c = (~crc) & 0xffffffff;
-    unsigned long algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
-    if (algn_diff < len) {
+    if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
         if (algn_diff) {
             c = crc32_braid_internal(c, buf, algn_diff);
+            len -= algn_diff;
         }
         aligned_buf = (uint64_t*) (buf + algn_diff);
-        aligned_len = len - algn_diff;
 #if !defined(WITHOUT_CHORBA)
-        if(aligned_len > CHORBA_LARGE_THRESHOLD) {
-            c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+        if(len > CHORBA_LARGE_THRESHOLD) {
+            c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
         } else
 #endif
-        if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
-            c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
-        } else {
-            c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
+        {
+            c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
         }
-    }
-    else {
+    } else {
+        // Process too short lengths using crc32_braid
         c = crc32_braid_internal(c, buf, len);
     }
 
index aebede45e2de7c8ffa2658d4bb8c27e8f3e1cd67..a7568a28006bf8f921c972f439f4da5fd5cbd89c 100644 (file)
@@ -305,33 +305,28 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c
 }
 
 Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
-    uint32_t c;
     uint64_t* aligned_buf;
-    size_t aligned_len;
-
-    c = (~crc) & 0xffffffff;
+    uint32_t c = (~crc) & 0xffffffff;
     uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
-    if (algn_diff < len) {
+
+    if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
         if (algn_diff) {
             c = crc32_braid_internal(c, buf, algn_diff);
+            len -= algn_diff;
         }
         aligned_buf = (uint64_t*) (buf + algn_diff);
-        aligned_len = len - algn_diff;
 #if !defined(WITHOUT_CHORBA)
-        if(aligned_len > CHORBA_LARGE_THRESHOLD) {
-            c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+        if(len > CHORBA_LARGE_THRESHOLD) {
+            c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
         } else
 #endif
-        if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD &&
-                   aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
-            c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, aligned_len);
-        } else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
-            c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
+        if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
+            c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len);
         } else {
-            c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
+            c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
         }
-    }
-    else {
+    } else {
+        // Process too short lengths using crc32_braid
         c = crc32_braid_internal(c, buf, len);
     }
 
diff --git a/crc32.h b/crc32.h
index e26b59e520c1c2b311a3dd3fddd70284ea708476..d41af8f01f6fcdd11f9466441bbb3d86fa24d5ef 100644 (file)
--- a/crc32.h
+++ b/crc32.h
 #define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
 #define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
 #define CHORBA_SMALL_THRESHOLD_64BIT 72
-#define CHORBA_SMALL_THRESHOLD_32BIT 80
+#if OPTIMAL_CMP == 64
+#  define CHORBA_SMALL_THRESHOLD 72
+#else
+#  define CHORBA_SMALL_THRESHOLD 80
+#endif
 
 typedef struct crc32_fold_s {
     uint8_t fold[CRC32_FOLD_BUFFER_SIZE];