]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add shared align/tail helpers for CRC32 ARMv8.
authorNathan Moinvaziri <nathan@nathanm.com>
Fri, 6 Mar 2026 03:00:54 +0000 (19:00 -0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 14 Mar 2026 01:14:21 +0000 (02:14 +0100)
arch/arm/crc32_armv8.c
arch/arm/crc32_armv8_p.h [new file with mode: 0644]
arch/arm/crc32_armv8_pmull_eor3.c

index 55dac2a56420bce4bb450f49d0b1c131fbd974a3..08043f7b02809248edd17b8a3e1fb206defe82e8 100644 (file)
@@ -8,58 +8,22 @@
 
 #include "zbuild.h"
 #include "acle_intrins.h"
+#include "crc32_armv8_p.h"
 
 Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
     uint32_t c = ~crc;
 
     if (UNLIKELY(len == 1)) {
         c = __crc32b(c, *buf);
-        c = ~c;
-        return c;
+        return ~c;
     }
 
+    /* Align to 8-byte boundary for tail processing */
     uintptr_t align_diff = ALIGN_DIFF(buf, 8);
-    if (align_diff) {
-        if (len && (align_diff & 1)) {
-            c = __crc32b(c, *buf++);
-            len--;
-        }
+    if (align_diff)
+        c = crc32_armv8_align(c, &buf, &len, align_diff);
 
-        if (len >= 2 && (align_diff & 2)) {
-            c = __crc32h(c, *((uint16_t*)buf));
-            buf += 2;
-            len -= 2;
-        }
-
-        if (len >= 4 && (align_diff & 4)) {
-            c = __crc32w(c, *((uint32_t*)buf));
-            len -= 4;
-            buf += 4;
-        }
-    }
-
-    while (len >= 8) {
-        c = __crc32d(c, *((uint64_t*)buf));
-        len -= 8;
-        buf += 8;
-    }
-
-    if (len & 4) {
-        c = __crc32w(c, *((uint32_t*)buf));
-        buf += 4;
-    }
-
-    if (len & 2) {
-        c = __crc32h(c, *((uint16_t*)buf));
-        buf += 2;
-    }
-
-    if (len & 1) {
-        c = __crc32b(c, *buf);
-    }
-
-    c = ~c;
-    return c;
+    return crc32_armv8_tail(c, buf, len);
 }
 
 Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
diff --git a/arch/arm/crc32_armv8_p.h b/arch/arm/crc32_armv8_p.h
new file mode 100644 (file)
index 0000000..83543d6
--- /dev/null
@@ -0,0 +1,72 @@
+/* crc32_armv8_p.h -- Private shared inline ARMv8 CRC32 functions
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef CRC32_ARMV8_P_H
+#define CRC32_ARMV8_P_H
+
+#include "zbuild.h"
+#include "acle_intrins.h"
+
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const uint8_t **buf,
+                                                             size_t *len, uintptr_t align_diff) {
+    if (*len && (align_diff & 1)) {
+        uint8_t val = **buf;
+        crc = __crc32b(crc, val);
+        *buf += 1;
+        *len -= 1;
+    }
+
+    if (*len >= 2 && (align_diff & 2)) {
+        uint16_t val = *((uint16_t*)*buf);
+        crc = __crc32h(crc, val);
+        *buf += 2;
+        *len -= 2;
+    }
+
+    if (*len >= 4 && (align_diff & 4)) {
+        uint32_t val = *((uint32_t*)*buf);
+        crc = __crc32w(crc, val);
+        *buf += 4;
+        *len -= 4;
+    }
+
+    if (*len >= 8 && (align_diff & 8)) {
+        uint64_t val = *((uint64_t*)*buf);
+        crc = __crc32d(crc, val);
+        *buf += 8;
+        *len -= 8;
+    }
+
+    return crc;
+}
+
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const uint8_t *buf, size_t len) {
+    while (len >= 8) {
+        uint64_t val = *((uint64_t*)buf);
+        crc = __crc32d(crc, val);
+        buf += 8;
+        len -= 8;
+    }
+
+    if (len & 4) {
+        uint32_t val = *((uint32_t*)buf);
+        crc = __crc32w(crc, val);
+        buf += 4;
+    }
+
+    if (len & 2) {
+        uint16_t val = *((uint16_t*)buf);
+        crc = __crc32h(crc, val);
+        buf += 2;
+    }
+
+    if (len & 1) {
+        uint8_t val = *buf;
+        crc = __crc32b(crc, val);
+    }
+
+    return ~crc;
+}
+
+#endif /* CRC32_ARMV8_P_H */
index 5b491be4ab1a15496f67472cf067ad39643a70b0..40260533ea3a49ed1d140587499842b78e0ffa27 100644 (file)
@@ -13,6 +13,7 @@
 #include "zutil.h"
 #include "acle_intrins.h"
 #include "neon_intrins.h"
+#include "crc32_armv8_p.h"
 
 /* Carryless multiply low 64 bits: a[0] * b[0] */
 static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) {
@@ -77,30 +78,8 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con
 
     /* Align to 16-byte boundary for vector path */
     uintptr_t align_diff = ALIGN_DIFF(buf, 16);
-    if (align_diff) {
-        if (len && (align_diff & 1)) {
-            crc0 = __crc32b(crc0, *buf++);
-            len--;
-        }
-
-        if (len >= 2 && (align_diff & 2)) {
-            crc0 = __crc32h(crc0, *((uint16_t*)buf));
-            buf += 2;
-            len -= 2;
-        }
-
-        if (len >= 4 && (align_diff & 4)) {
-            crc0 = __crc32w(crc0, *((uint32_t*)buf));
-            len -= 4;
-            buf += 4;
-        }
-
-        if (len >= 8 && (align_diff & 8)) {
-            crc0 = __crc32d(crc0, *((uint64_t*)buf));
-            buf += 8;
-            len -= 8;
-        }
-    }
+    if (align_diff)
+        crc0 = crc32_armv8_align(crc0, &buf, &len, align_diff);
 
     /* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */
     if (len >= 192) {
@@ -246,27 +225,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con
     }
 
     /* Process remaining bytes */
-    while (len >= 8) {
-        crc0 = __crc32d(crc0, *((uint64_t*)buf));
-        len -= 8;
-        buf += 8;
-    }
-
-    if (len & 4) {
-        crc0 = __crc32w(crc0, *((uint32_t*)buf));
-        buf += 4;
-    }
-
-    if (len & 2) {
-        crc0 = __crc32h(crc0, *((uint16_t*)buf));
-        buf += 2;
-    }
-
-    if (len & 1) {
-        crc0 = __crc32b(crc0, *buf);
-    }
-
-    return ~crc0;
+    return crc32_armv8_tail(crc0, buf, len);
 }
 
 Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {