#include "acle_intrins.h"
#include "crc32_armv8_p.h"
-Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len,
+ const int COPY) {
uint32_t c = ~crc;
if (UNLIKELY(len == 1)) {
- c = __crc32b(c, *buf);
+ if (COPY)
+ *dst = *src;
+ c = __crc32b(c, *src);
return ~c;
}
/* Align to 8-byte boundary for tail processing */
- uintptr_t align_diff = ALIGN_DIFF(buf, 8);
+ uintptr_t align_diff = ALIGN_DIFF(src, 8);
if (align_diff)
- c = crc32_armv8_align(c, &buf, &len, align_diff);
+ c = crc32_armv8_align(c, &dst, &src, &len, align_diff, COPY);
+
+ return crc32_armv8_tail(c, dst, src, len, COPY);
+}
- return crc32_armv8_tail(c, buf, len);
+Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
+ return crc32_copy_impl(crc, NULL, buf, len, 0);
}
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc = crc32_armv8(crc, src, len);
- memcpy(dst, src, len);
- return crc;
+ return crc32_copy_impl(crc, dst, src, len, 1);
}
#endif
#include "zbuild.h"
#include "acle_intrins.h"
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const uint8_t **buf,
- size_t *len, uintptr_t align_diff) {
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, uint8_t **dst, const uint8_t **buf,
+ size_t *len, uintptr_t align_diff, const int COPY) {
if (*len && (align_diff & 1)) {
uint8_t val = **buf;
+ if (COPY) {
+ **dst = val;
+ *dst += 1;
+ }
crc = __crc32b(crc, val);
*buf += 1;
*len -= 1;
if (*len >= 2 && (align_diff & 2)) {
uint16_t val = *((uint16_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 2);
+ *dst += 2;
+ }
crc = __crc32h(crc, val);
*buf += 2;
*len -= 2;
if (*len >= 4 && (align_diff & 4)) {
uint32_t val = *((uint32_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 4);
+ *dst += 4;
+ }
crc = __crc32w(crc, val);
*buf += 4;
*len -= 4;
if (*len >= 8 && (align_diff & 8)) {
uint64_t val = *((uint64_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 8);
+ *dst += 8;
+ }
crc = __crc32d(crc, val);
*buf += 8;
*len -= 8;
return crc;
}
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const uint8_t *buf, size_t len) {
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, uint8_t *dst, const uint8_t *buf,
+ size_t len, const int COPY) {
while (len >= 8) {
uint64_t val = *((uint64_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 8);
+ dst += 8;
+ }
crc = __crc32d(crc, val);
buf += 8;
len -= 8;
if (len & 4) {
uint32_t val = *((uint32_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 4);
+ dst += 4;
+ }
crc = __crc32w(crc, val);
buf += 4;
}
if (len & 2) {
uint16_t val = *((uint16_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 2);
+ dst += 2;
+ }
crc = __crc32h(crc, val);
buf += 2;
}
if (len & 1) {
uint8_t val = *buf;
+ if (COPY)
+ *dst = val;
crc = __crc32b(crc, val);
}
Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, const uint8_t *buf, size_t len) {
uint32_t crc0 = ~crc;
+ uint8_t *dst = NULL;
if (UNLIKELY(len == 1)) {
crc0 = __crc32b(crc0, *buf);
/* Align to 16-byte boundary for vector path */
uintptr_t align_diff = ALIGN_DIFF(buf, 16);
if (align_diff)
- crc0 = crc32_armv8_align(crc0, &buf, &len, align_diff);
+ crc0 = crc32_armv8_align(crc0, &dst, &buf, &len, align_diff, 0);
/* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */
if (len >= 192) {
}
/* Process remaining bytes */
- return crc32_armv8_tail(crc0, buf, len);
+ return crc32_armv8_tail(crc0, NULL, buf, len, 0);
}
Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {