typedef unsigned int uv4si __attribute__((vector_size(16)));
typedef unsigned long long uv2di __attribute__((vector_size(16)));
-static uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) {
+static uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, uint64_t len) {
/*
* The CRC-32 constant block contains reduction constants to fold and
* process particular chunks of the input data stream in parallel.
#include <assert.h>
#ifdef X86_VPCLMULQDQ_CRC
-extern size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len, __m128i init_crc,
+extern uint64_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, uint64_t len, __m128i init_crc,
int32_t first);
-extern size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint64_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, uint64_t len);
#endif
static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/
};
-static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
+static void partial_fold(const uint64_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
__m128i *xmm_crc3, __m128i *xmm_crc_part) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
#ifdef X86_PCLMULQDQ_CRC
#ifdef COPY
-Z_INTERNAL void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) {
#else
-Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
+Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) {
#endif
unsigned long algn_diff;
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
if (len == 0)
return;
- memcpy(partial_buf, src, len);
+ memcpy(partial_buf, src, (size_t)len);
xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf);
- memcpy(dst, partial_buf, len);
+ memcpy(dst, partial_buf, (size_t)len);
#endif
goto partial;
}
#ifdef X86_VPCLMULQDQ_CRC
if (x86_cpu_has_vpclmulqdq && x86_cpu_has_avx512 && (len >= 256)) {
#ifdef COPY
- size_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len);
+ uint64_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len);
dst += n;
#else
- size_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len,
+ uint64_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len,
xmm_initial, first);
first = 0;
#endif
partial:
if (len) {
- memcpy(&xmm_crc_part, src, len);
+ memcpy(&xmm_crc_part, src, (size_t)len);
#ifdef COPY
_mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part);
- memcpy(dst, partial_buf, len);
+ memcpy(dst, partial_buf, (size_t)len);
#endif
partial_fold((size_t)len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
}
*/
#ifdef COPY
-size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len) {
+uint64_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, uint64_t len) {
#else
-size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len,
+uint64_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
+ __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, uint64_t len,
__m128i init_crc, int32_t first) {
__m512i zmm_initial = _mm512_zextsi128_si512(init_crc);
#endif
__m512i zmm_t0, zmm_t1, zmm_t2, zmm_t3;
__m512i zmm_crc0, zmm_crc1, zmm_crc2, zmm_crc3;
__m512i z0, z1, z2, z3;
- size_t len_tmp = len;
+ uint64_t len_tmp = len;
const __m512i zmm_fold4 = _mm512_set4_epi32(
0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
const __m512i zmm_fold16 = _mm512_set4_epi32(
/* CRC32 folding */
#ifdef X86_PCLMULQDQ_CRC
extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
-extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
-extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
+extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
extern uint32_t crc32_pclmulqdq(uint32_t crc32, const unsigned char* buf, uint64_t len);
#endif
return crc->value;
}
-Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) {
crc->value = functable.crc32(crc->value, src, len);
- memcpy(dst, src, len);
+ while (len > SIZE_MAX) {
+ memcpy(dst, src, SIZE_MAX);
+ dst += SIZE_MAX;
+ src += SIZE_MAX;
+ len -= SIZE_MAX;
+ }
+ if (len) {
+ memcpy(dst, src, (size_t)len);
+ }
}
-Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
+Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) {
/* Note: while this is basically the same thing as the vanilla CRC function, we still need
* a functable entry for it so that we can generically dispatch to this function with the
* same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The
} crc32_fold;
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
-Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
-Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
+Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
+Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
#endif
return functable.crc32_fold_reset(crc);
}
-Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) {
functable.crc32_fold_copy = &crc32_fold_copy_c;
cpu_check_features();
#ifdef X86_PCLMULQDQ_CRC
functable.crc32_fold_copy(crc, dst, src, len);
}
-Z_INTERNAL void crc32_fold_stub(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
+Z_INTERNAL void crc32_fold_stub(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) {
functable.crc32_fold = &crc32_fold_c;
cpu_check_features();
#ifdef X86_PCLMULQDQ_CRC
uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len);
uint32_t (* crc32_fold_reset) (crc32_fold *crc);
- void (* crc32_fold_copy) (crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
- void (* crc32_fold) (crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
+ void (* crc32_fold_copy) (crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
+ void (* crc32_fold) (crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
uint32_t (* crc32_fold_final) (crc32_fold *crc);
uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1);
uint32_t (* chunksize) (void);