#endif
#include "../../zbuild.h"
-uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, uint64_t len) {
+uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
Z_REGISTER const uint32_t *buf4;
static unsigned int ALIGNED_(32) __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
-Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, uint64_t _len) {
+Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, size_t _len) {
unsigned int prealign;
unsigned int tail;
typedef unsigned int uv4si __attribute__((vector_size(16)));
typedef unsigned long long uv2di __attribute__((vector_size(16)));
-static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, uint64_t len) {
+static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) {
/*
* The CRC-32 constant block contains reduction constants to fold and
* process particular chunks of the input data stream in parallel.
#define VX_ALIGNMENT 16L
#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, uint64_t len) {
- uint64_t prealign, aligned, remaining;
+uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, size_t len) {
+ size_t prealign, aligned, remaining;
if (len < VX_MIN_LEN + VX_ALIGN_MASK)
return PREFIX(crc32_braid)(crc, buf, len);
aligned = len & ~VX_ALIGN_MASK;
remaining = len & VX_ALIGN_MASK;
- crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff;
+ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, aligned) ^ 0xffffffff;
if (remaining)
crc = PREFIX(crc32_braid)(crc, buf + aligned, remaining);
#include <assert.h>
#ifdef X86_VPCLMULQDQ_CRC
-extern uint64_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, uint64_t len, __m128i init_crc,
+extern size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len, __m128i init_crc,
int32_t first);
-extern uint64_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len);
#endif
static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/
};
-static void partial_fold(const uint64_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
+static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
__m128i *xmm_crc3, __m128i *xmm_crc_part) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
return crc->value;
}
-Z_INTERNAL uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, uint64_t len) {
+Z_INTERNAL uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len) {
/* For lens < 64, crc32_braid method is faster. The CRC32 instruction for
* these short lengths might also prove to be effective */
if (len < 64)
#ifdef X86_PCLMULQDQ_CRC
#ifdef COPY
-Z_INTERNAL void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
#else
-Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) {
+Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
#endif
unsigned long algn_diff;
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
if (len == 0)
return;
- memcpy(partial_buf, src, (size_t)len);
+ memcpy(partial_buf, src, len);
xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf);
- memcpy(dst, partial_buf, (size_t)len);
+ memcpy(dst, partial_buf, len);
#endif
goto partial;
}
#ifdef X86_VPCLMULQDQ_CRC
if (x86_cpu_has_vpclmulqdq && x86_cpu_has_avx512 && (len >= 256)) {
#ifdef COPY
- uint64_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len);
+ size_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len);
dst += n;
#else
- uint64_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len,
+ size_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len,
xmm_initial, first);
first = 0;
#endif
partial:
if (len) {
- memcpy(&xmm_crc_part, src, (size_t)len);
+ memcpy(&xmm_crc_part, src, len);
#ifdef COPY
_mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part);
- memcpy(dst, partial_buf, (size_t)len);
+ memcpy(dst, partial_buf, len);
#endif
- partial_fold((size_t)len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
+ partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
}
crc32_fold_save((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
*/
#ifdef COPY
-Z_INTERNAL uint64_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len) {
#else
-Z_INTERNAL uint64_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, uint64_t len,
+Z_INTERNAL size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len,
__m128i init_crc, int32_t first) {
__m512i zmm_initial = _mm512_zextsi128_si512(init_crc);
#endif
__m512i zmm_t0, zmm_t1, zmm_t2, zmm_t3;
__m512i zmm_crc0, zmm_crc1, zmm_crc2, zmm_crc3;
__m512i z0, z1, z2, z3;
- uint64_t len_tmp = len;
+ size_t len_tmp = len;
const __m512i zmm_fold4 = _mm512_set4_epi32(
0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
const __m512i zmm_fold16 = _mm512_set4_epi32(
/* CRC32 folding */
#ifdef X86_PCLMULQDQ_CRC
extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
-extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
-extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
+extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
+extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
-extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, uint64_t len);
+extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
#endif
/* memory chunking */
#endif
/* CRC32 */
-typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, uint64_t len);
+typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
-extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64_t len);
+extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
#ifdef ARM_ACLE_CRC_HASH
-extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, uint64_t len);
+extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
#elif defined(POWER8_VSX_CRC32)
-extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, uint64_t len);
+extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
#elif defined(S390_CRC32_VX)
-extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, uint64_t len);
+extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, size_t len);
#endif
/* compare256 */
#endif /* W */
/* ========================================================================= */
-Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64_t len) {
+Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) {
Z_REGISTER uint32_t c;
/* Pre-condition the CRC */
#ifdef W
/* If provided enough bytes, do a braided CRC calculation. */
if (len >= N * W + W - 1) {
- uint64_t blks;
+ size_t blks;
z_word_t const *words;
int k;
/* CRC polynomial. */
#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
-extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64_t len);
+extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
#endif /* CRC32_BRAID_P_H_ */
return crc->value;
}
-Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
crc->value = functable.crc32(crc->value, src, len);
-/* Test that we don't try to copy more than actually fits in available address space */
-#if INTPTR_MAX > SSIZE_MAX
- while (len > SSIZE_MAX) {
- memcpy(dst, src, SSIZE_MAX);
- dst += SSIZE_MAX;
- src += SSIZE_MAX;
- len -= SSIZE_MAX;
- }
-#endif
- if (len) {
- memcpy(dst, src, (size_t)len);
- }
+ memcpy(dst, src, len);
}
-Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) {
+Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
/* Note: while this is basically the same thing as the vanilla CRC function, we still need
* a functable entry for it so that we can generically dispatch to this function with the
* same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The
} crc32_fold;
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
-Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
-Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
+Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
+Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
#endif
return functable.crc32_fold_reset(crc);
}
-Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
functable.crc32_fold_copy = &crc32_fold_copy_c;
cpu_check_features();
#ifdef X86_PCLMULQDQ_CRC
functable.crc32_fold_copy(crc, dst, src, len);
}
-Z_INTERNAL void crc32_fold_stub(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) {
+Z_INTERNAL void crc32_fold_stub(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
functable.crc32_fold = &crc32_fold_c;
cpu_check_features();
#ifdef X86_PCLMULQDQ_CRC
return functable.chunkmemset_safe(out, dist, len, left);
}
-Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const uint8_t *buf, uint64_t len) {
- Assert(sizeof(uint64_t) >= sizeof(size_t),
- "crc32_z takes size_t but internally we have a uint64_t len");
-
+Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const uint8_t *buf, size_t len) {
functable.crc32 = &PREFIX(crc32_braid);
cpu_check_features();
#ifdef ARM_ACLE_CRC_HASH
struct functable_s {
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, uint64_t len);
uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
- uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, uint64_t len);
+ uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len);
uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc);
- void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
- void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
+ void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, size_t len);
+ void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, size_t len, uint32_t init_crc);
uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc);
uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1);
uint32_t (* chunksize) (void);