]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add ACCUM_ROUND macro to crc32_chorba_c
authorNathan Moin Vaziri <nathan@nathanm.com>
Thu, 2 Apr 2026 04:38:16 +0000 (21:38 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Tue, 14 Apr 2026 12:57:51 +0000 (14:57 +0200)
arch/generic/crc32_chorba_c.c

index 2b12677d5b86b92052476a72aed4166a648cbf4c..9f8427bd1cdad40c5fe9b21e8aa5c5e0cb3fff92 100644 (file)
         d = (invec) >> 20; \
     } while (0)
 
+#define ACCUM_ROUND(n1, n2, n3, n4, n5) \
+    do { \
+        n1 = n5 ^ (a3 ^ b2 ^ c1); \
+        n2 = a4 ^ b3 ^ c2 ^ d1; \
+        n3 = b4 ^ c3 ^ d2; \
+        n4 = c4 ^ d3; \
+        n5 = d4; \
+    } while (0)
+
 /**
  * Implements the Chorba algorithm for CRC32 computation (https://arxiv.org/abs/2412.16398).
  *
@@ -427,11 +436,6 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const uint8
         uint64_t c1, c2, c3, c4;
         uint64_t d1, d2, d3, d4;
 
-        uint64_t out1;
-        uint64_t out2;
-        uint64_t out3;
-        uint64_t out4;
-        uint64_t out5;
 
         in1 = input_qwords[i / sizeof(uint64_t)] ^ bitbuffer_qwords[(i / sizeof(uint64_t)) % bitbuffer_size_qwords];
         in2 = input_qwords[i / sizeof(uint64_t) + 1] ^ bitbuffer_qwords[(i / sizeof(uint64_t) + 1) % bitbuffer_size_qwords];
@@ -451,17 +455,7 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const uint8
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1_64 = next5_64 ^ out1;
-        next2_64 = out2;
-        next3_64 = out3;
-        next4_64 = out4;
-        next5_64 = out5;
+        ACCUM_ROUND(next1_64, next2_64, next3_64, next4_64, next5_64);
     }
 
     memcpy(final, input_qwords + (i / sizeof(uint64_t)), len-i);
@@ -567,11 +561,6 @@ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint8_
         uint64_t c1, c2, c3, c4;
         uint64_t d1, d2, d3, d4;
 
-        uint64_t out1;
-        uint64_t out2;
-        uint64_t out3;
-        uint64_t out4;
-        uint64_t out5;
 
         in1 = input[i / sizeof(uint64_t)] ^ bitbuffer[(i / sizeof(uint64_t))];
         in2 = input[(i + 8) / sizeof(uint64_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 1)];
@@ -591,17 +580,7 @@ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1_64 = next5_64 ^ out1;
-        next2_64 = out2;
-        next3_64 = out3;
-        next4_64 = out4;
-        next5_64 = out5;
+        ACCUM_ROUND(next1_64, next2_64, next3_64, next4_64, next5_64);
 
     }
 
@@ -646,11 +625,6 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
         uint64_t c1, c2, c3, c4;
         uint64_t d1, d2, d3, d4;
 
-        uint64_t out1;
-        uint64_t out2;
-        uint64_t out3;
-        uint64_t out4;
-        uint64_t out5;
 
         uint64_t chorba1 = Z_U64_FROM_LE(input[i / sizeof(uint64_t)]) ^ next1;
         uint64_t chorba2 = Z_U64_FROM_LE(input[i / sizeof(uint64_t) + 1]) ^ next2;
@@ -678,17 +652,10 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        /* chorba5 already consumed next5, clear it so ACCUM_ROUND
+           does not xor the stale value into next1 */
+        next5 = 0;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -707,17 +674,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -736,17 +693,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -765,17 +712,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -794,17 +731,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -823,17 +750,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -852,17 +769,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
 
         i += 32;
 
@@ -881,17 +788,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
     }
 
     for (; (i + 40 + 32) < len; i += 32) {
@@ -904,11 +801,6 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
         uint64_t c1, c2, c3, c4;
         uint64_t d1, d2, d3, d4;
 
-        uint64_t out1;
-        uint64_t out2;
-        uint64_t out3;
-        uint64_t out4;
-        uint64_t out5;
 
         in1 = Z_U64_FROM_LE(input[i / sizeof(uint64_t)]) ^ next1;
         in2 = Z_U64_FROM_LE(input[i / sizeof(uint64_t) + 1]) ^ next2;
@@ -924,17 +816,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint8_
 
         NEXT_ROUND(in4, d1, d2, d3, d4);
 
-        out1 = a3 ^ b2 ^ c1;
-        out2 = a4 ^ b3 ^ c2 ^ d1;
-        out3 = b4 ^ c3 ^ d2;
-        out4 = c4 ^ d3;
-        out5 = d4;
-
-        next1 = next5 ^ out1;
-        next2 = out2;
-        next3 = out3;
-        next4 = out4;
-        next5 = out5;
+        ACCUM_ROUND(next1, next2, next3, next4, next5);
     }
 
     memcpy(final, input+(i / sizeof(uint64_t)), len-i);