#include "acle_intrins.h"
#include "crc32_armv8_p.h"
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len,
- const int COPY) {
- uint32_t c = ~crc;
+#include "arch/shared/crc32_hw_copy_impl_tpl.h"
- if (UNLIKELY(len == 1)) {
- if (COPY)
- *dst = *src;
- c = __crc32b(c, *src);
- return ~c;
- }
-
- /* Align to 8-byte boundary for tail processing */
- uintptr_t align_diff = ALIGN_DIFF(src, 8);
- if (align_diff)
- c = crc32_armv8_align(c, &dst, &src, &len, align_diff, COPY);
-
- while (len >= 64) {
- uint64_t d0 = *(const uint64_t *)src;
- uint64_t d1 = *(const uint64_t *)(src + 8);
- uint64_t d2 = *(const uint64_t *)(src + 16);
- uint64_t d3 = *(const uint64_t *)(src + 24);
- uint64_t d4 = *(const uint64_t *)(src + 32);
- uint64_t d5 = *(const uint64_t *)(src + 40);
- uint64_t d6 = *(const uint64_t *)(src + 48);
- uint64_t d7 = *(const uint64_t *)(src + 56);
-
- if (COPY) {
- memcpy(dst, &d0, 8);
- memcpy(dst + 8, &d1, 8);
- memcpy(dst + 16, &d2, 8);
- memcpy(dst + 24, &d3, 8);
- memcpy(dst + 32, &d4, 8);
- memcpy(dst + 40, &d5, 8);
- memcpy(dst + 48, &d6, 8);
- memcpy(dst + 56, &d7, 8);
- dst += 64;
- }
-
- c = __crc32d(c, d0);
- c = __crc32d(c, d1);
- c = __crc32d(c, d2);
- c = __crc32d(c, d3);
- c = __crc32d(c, d4);
- c = __crc32d(c, d5);
- c = __crc32d(c, d6);
- c = __crc32d(c, d7);
-
- src += 64;
- len -= 64;
- }
-
- return crc32_armv8_tail(c, dst, src, len, COPY);
-}
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
- return crc32_copy_impl(crc, NULL, buf, len, 0);
+ return crc32_hw_copy_impl(crc, NULL, buf, len, 0);
}
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
#if OPTIMAL_CMP >= 32
- return crc32_copy_impl(crc, dst, src, len, 1);
+ return crc32_hw_copy_impl(crc, dst, src, len, 1);
#else
/* Without unaligned access, interleaved stores get decomposed into byte ops */
crc = crc32_armv8(crc, src, len);
#include "zbuild.h"
#include "acle_intrins.h"
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, uint8_t **dst, const uint8_t **buf,
- size_t *len, uintptr_t align_diff, const int COPY) {
- if (*len && (align_diff & 1)) {
- uint8_t val = **buf;
- if (COPY) {
- **dst = val;
- *dst += 1;
- }
- crc = __crc32b(crc, val);
- *buf += 1;
- *len -= 1;
- }
+#define CRC32B(crc, val) __crc32b((crc), (val))
+#define CRC32H(crc, val) __crc32h((crc), (val))
+#define CRC32W(crc, val) __crc32w((crc), (val))
+#define CRC32D(crc, val) __crc32d((crc), (val))
- if (*len >= 2 && (align_diff & 2)) {
- uint16_t val = *((uint16_t*)*buf);
- if (COPY) {
- memcpy(*dst, &val, 2);
- *dst += 2;
- }
- crc = __crc32h(crc, val);
- *buf += 2;
- *len -= 2;
- }
-
- if (*len >= 4 && (align_diff & 4)) {
- uint32_t val = *((uint32_t*)*buf);
- if (COPY) {
- memcpy(*dst, &val, 4);
- *dst += 4;
- }
- crc = __crc32w(crc, val);
- *buf += 4;
- *len -= 4;
- }
-
- if (*len >= 8 && (align_diff & 8)) {
- uint64_t val = *((uint64_t*)*buf);
- if (COPY) {
- memcpy(*dst, &val, 8);
- *dst += 8;
- }
- crc = __crc32d(crc, val);
- *buf += 8;
- *len -= 8;
- }
-
- return crc;
-}
-
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, uint8_t *dst, const uint8_t *buf,
- size_t len, const int COPY) {
- while (len >= 8) {
- uint64_t val = *((uint64_t*)buf);
- if (COPY) {
- memcpy(dst, &val, 8);
- dst += 8;
- }
- crc = __crc32d(crc, val);
- buf += 8;
- len -= 8;
- }
-
- if (len & 4) {
- uint32_t val = *((uint32_t*)buf);
- if (COPY) {
- memcpy(dst, &val, 4);
- dst += 4;
- }
- crc = __crc32w(crc, val);
- buf += 4;
- }
-
- if (len & 2) {
- uint16_t val = *((uint16_t*)buf);
- if (COPY) {
- memcpy(dst, &val, 2);
- dst += 2;
- }
- crc = __crc32h(crc, val);
- buf += 2;
- }
-
- if (len & 1) {
- uint8_t val = *buf;
- if (COPY)
- *dst = val;
- crc = __crc32b(crc, val);
- }
-
- return ~crc;
-}
+#include "arch/shared/crc32_hw_common_tpl.h"
#endif /* CRC32_ARMV8_P_H */
/* Align to 16-byte boundary for vector path */
uintptr_t align_diff = ALIGN_DIFF(src, 16);
if (align_diff)
- crc0 = crc32_armv8_align(crc0, &dst, &src, &len, align_diff, COPY);
+ crc0 = crc32_hw_align(crc0, &dst, &src, &len, align_diff, COPY);
/* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */
if (len >= 192) {
}
/* Process remaining bytes */
- return crc32_armv8_tail(crc0, dst, src, len, COPY);
+ return crc32_hw_tail(crc0, dst, src, len, COPY);
}
Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, const uint8_t *buf, size_t len) {
--- /dev/null
+/* crc32_hw_common_tpl.h -- Private shared inline CRC32 functions for CPU with native crc instructions
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+
+
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_hw_align(uint32_t crc, uint8_t **dst, const uint8_t **buf,
+ size_t *len, uintptr_t align_diff, const int COPY) {
+ if (*len && (align_diff & 1)) {
+ uint8_t val = **buf;
+ if (COPY) {
+ **dst = val;
+ *dst += 1;
+ }
+ crc = CRC32B(crc, val);
+ *buf += 1;
+ *len -= 1;
+ }
+
+ if (*len >= 2 && (align_diff & 2)) {
+ uint16_t val = *((uint16_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 2);
+ *dst += 2;
+ }
+ crc = CRC32H(crc, val);
+ *buf += 2;
+ *len -= 2;
+ }
+
+ if (*len >= 4 && (align_diff & 4)) {
+ uint32_t val = *((uint32_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 4);
+ *dst += 4;
+ }
+ crc = CRC32W(crc, val);
+ *buf += 4;
+ *len -= 4;
+ }
+
+ if (*len >= 8 && (align_diff & 8)) {
+ uint64_t val = *((uint64_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 8);
+ *dst += 8;
+ }
+ crc = CRC32D(crc, val);
+ *buf += 8;
+ *len -= 8;
+ }
+
+ return crc;
+}
+
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_hw_tail(uint32_t crc, uint8_t *dst, const uint8_t *buf,
+ size_t len, const int COPY) {
+ while (len >= 8) {
+ uint64_t val = *((uint64_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 8);
+ dst += 8;
+ }
+ crc = CRC32D(crc, val);
+ buf += 8;
+ len -= 8;
+ }
+
+ if (len & 4) {
+ uint32_t val = *((uint32_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 4);
+ dst += 4;
+ }
+ crc = CRC32W(crc, val);
+ buf += 4;
+ }
+
+ if (len & 2) {
+ uint16_t val = *((uint16_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 2);
+ dst += 2;
+ }
+ crc = CRC32H(crc, val);
+ buf += 2;
+ }
+
+ if (len & 1) {
+ uint8_t val = *buf;
+ if (COPY)
+ *dst = val;
+ crc = CRC32B(crc, val);
+ }
+
+ return ~crc;
+}
--- /dev/null
+/* crc32_hw_copy_impl_tpl.h -- compute the CRC-32 of a data stream for CPU with native crc instructions
+ * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler
+ * Copyright (C) 2016 Yang Zhang
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+
+
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_hw_copy_impl(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len,
+ const int COPY) {
+ uint32_t c = ~crc;
+
+ if (UNLIKELY(len == 1)) {
+ if (COPY)
+ *dst = *src;
+ c = CRC32B(c, *src);
+ return ~c;
+ }
+
+ /* Align to 8-byte boundary for tail processing */
+ uintptr_t align_diff = ALIGN_DIFF(src, 8);
+ if (align_diff)
+ c = crc32_hw_align(c, &dst, &src, &len, align_diff, COPY);
+
+ while (len >= 64) {
+ uint64_t d0 = *(const uint64_t *)src;
+ uint64_t d1 = *(const uint64_t *)(src + 8);
+ uint64_t d2 = *(const uint64_t *)(src + 16);
+ uint64_t d3 = *(const uint64_t *)(src + 24);
+ uint64_t d4 = *(const uint64_t *)(src + 32);
+ uint64_t d5 = *(const uint64_t *)(src + 40);
+ uint64_t d6 = *(const uint64_t *)(src + 48);
+ uint64_t d7 = *(const uint64_t *)(src + 56);
+
+ if (COPY) {
+ memcpy(dst, &d0, 8);
+ memcpy(dst + 8, &d1, 8);
+ memcpy(dst + 16, &d2, 8);
+ memcpy(dst + 24, &d3, 8);
+ memcpy(dst + 32, &d4, 8);
+ memcpy(dst + 40, &d5, 8);
+ memcpy(dst + 48, &d6, 8);
+ memcpy(dst + 56, &d7, 8);
+ dst += 64;
+ }
+
+ c = CRC32D(c, d0);
+ c = CRC32D(c, d1);
+ c = CRC32D(c, d2);
+ c = CRC32D(c, d3);
+ c = CRC32D(c, d4);
+ c = CRC32D(c, d5);
+ c = CRC32D(c, d6);
+ c = CRC32D(c, d7);
+
+ src += 64;
+ len -= 64;
+ }
+
+ return crc32_hw_tail(c, dst, src, len, COPY);
+}