From: Cameron Cawley Date: Fri, 30 Sep 2022 15:06:42 +0000 (+0100) Subject: Use size_t instead of uint64_t for len in all crc32 functions X-Git-Tag: 2.1.0-beta1~99 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=23e43059328b0ae99f86a61aba6d8652d08e3ccc;p=thirdparty%2Fzlib-ng.git Use size_t instead of uint64_t for len in all crc32 functions --- diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index ef18ae81..701c9925 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -13,7 +13,7 @@ #endif #include "../../zbuild.h" -uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, uint64_t len) { +uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; Z_REGISTER const uint16_t *buf2; Z_REGISTER const uint32_t *buf4; diff --git a/arch/power/crc32_power8.c b/arch/power/crc32_power8.c index fbb64721..1cb5f299 100644 --- a/arch/power/crc32_power8.c +++ b/arch/power/crc32_power8.c @@ -48,7 +48,7 @@ static unsigned int crc32_align(unsigned int crc, const unsigned char *p, unsign static unsigned int ALIGNED_(32) __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); -Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, uint64_t _len) { +Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, size_t _len) { unsigned int prealign; unsigned int tail; diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c index a64d6291..78c0be5c 100644 --- a/arch/s390/crc32-vx.c +++ b/arch/s390/crc32-vx.c @@ -21,7 +21,7 @@ typedef unsigned char uv16qi __attribute__((vector_size(16))); typedef unsigned int uv4si __attribute__((vector_size(16))); typedef unsigned long long uv2di __attribute__((vector_size(16))); -static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, uint64_t len) { +static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) { /* * The CRC-32 constant block contains reduction constants to fold and * process particular chunks of the input data stream in parallel. @@ -198,8 +198,8 @@ static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, uint64_t len) #define VX_ALIGNMENT 16L #define VX_ALIGN_MASK (VX_ALIGNMENT - 1) -uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, uint64_t len) { - uint64_t prealign, aligned, remaining; +uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, size_t len) { + size_t prealign, aligned, remaining; if (len < VX_MIN_LEN + VX_ALIGN_MASK) return PREFIX(crc32_braid)(crc, buf, len); @@ -213,7 +213,7 @@ uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf aligned = len & ~VX_ALIGN_MASK; remaining = len & VX_ALIGN_MASK; - crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff; + crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, aligned) ^ 0xffffffff; if (remaining) crc = PREFIX(crc32_braid)(crc, buf + aligned, remaining); diff --git a/arch/x86/crc32_fold_pclmulqdq.c b/arch/x86/crc32_fold_pclmulqdq.c index c6f04842..ecee0c57 100644 --- a/arch/x86/crc32_fold_pclmulqdq.c +++ b/arch/x86/crc32_fold_pclmulqdq.c @@ -32,11 +32,11 @@ #include #ifdef X86_VPCLMULQDQ_CRC -extern uint64_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1, - __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, uint64_t len, __m128i init_crc, +extern size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1, + __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len, __m128i init_crc, int32_t first); -extern uint64_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1, - __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, uint64_t len); +extern size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1, + __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len); #endif static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { @@ -185,7 +185,7 @@ static const unsigned ALIGNED_(32) pshufb_shf_table[60] = { 0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/ }; -static void partial_fold(const uint64_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, +static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3, __m128i *xmm_crc_part) { const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4, @@ -342,7 +342,7 @@ Z_INTERNAL uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc) { return crc->value; } -Z_INTERNAL uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, uint64_t len) { +Z_INTERNAL uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len) { /* For lens < 64, crc32_braid method is faster. The CRC32 instruction for * these short lengths might also prove to be effective */ if (len < 64) diff --git a/arch/x86/crc32_fold_pclmulqdq_tpl.h b/arch/x86/crc32_fold_pclmulqdq_tpl.h index 0799638e..47bbc011 100644 --- a/arch/x86/crc32_fold_pclmulqdq_tpl.h +++ b/arch/x86/crc32_fold_pclmulqdq_tpl.h @@ -20,9 +20,9 @@ #ifdef X86_PCLMULQDQ_CRC #ifdef COPY -Z_INTERNAL void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) { +Z_INTERNAL void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { #else -Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) { +Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) { #endif unsigned long algn_diff; __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; @@ -47,9 +47,9 @@ Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64 if (len == 0) return; - memcpy(partial_buf, src, (size_t)len); + memcpy(partial_buf, src, len); xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf); - memcpy(dst, partial_buf, (size_t)len); + memcpy(dst, partial_buf, len); #endif goto partial; } @@ -82,10 +82,10 @@ Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64 #ifdef X86_VPCLMULQDQ_CRC if (x86_cpu_has_vpclmulqdq && x86_cpu_has_avx512 && (len >= 256)) { #ifdef COPY - uint64_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len); + size_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len); dst += n; #else - uint64_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len, + size_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len, xmm_initial, first); first = 0; #endif @@ -176,12 +176,12 @@ Z_INTERNAL void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64 partial: if (len) { - memcpy(&xmm_crc_part, src, (size_t)len); + memcpy(&xmm_crc_part, src, len); #ifdef COPY _mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part); - memcpy(dst, partial_buf, (size_t)len); + memcpy(dst, partial_buf, len); #endif - partial_fold((size_t)len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); + partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); } crc32_fold_save((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); diff --git a/arch/x86/crc32_fold_vpclmulqdq_tpl.h b/arch/x86/crc32_fold_vpclmulqdq_tpl.h index 421b64da..3d27cb3d 100644 --- a/arch/x86/crc32_fold_vpclmulqdq_tpl.h +++ b/arch/x86/crc32_fold_vpclmulqdq_tpl.h @@ -4,18 +4,18 @@ */ #ifdef COPY -Z_INTERNAL uint64_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1, - __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, uint64_t len) { +Z_INTERNAL size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1, + __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len) { #else -Z_INTERNAL uint64_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1, - __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, uint64_t len, +Z_INTERNAL size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1, + __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len, __m128i init_crc, int32_t first) { __m512i zmm_initial = _mm512_zextsi128_si512(init_crc); #endif __m512i zmm_t0, zmm_t1, zmm_t2, zmm_t3; __m512i zmm_crc0, zmm_crc1, zmm_crc2, zmm_crc3; __m512i z0, z1, z2, z3; - uint64_t len_tmp = len; + size_t len_tmp = len; const __m512i zmm_fold4 = _mm512_set4_epi32( 0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596); const __m512i zmm_fold16 = _mm512_set4_epi32( diff --git a/cpu_features.h b/cpu_features.h index b8de4b71..9d10ce7d 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -65,10 +65,10 @@ extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, cons /* CRC32 folding */ #ifdef X86_PCLMULQDQ_CRC extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); -extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len); -extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc); +extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); -extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, uint64_t len); +extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); #endif /* memory chunking */ @@ -111,15 +111,15 @@ extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned le #endif /* CRC32 */ -typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, uint64_t len); +typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); -extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64_t len); +extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); #ifdef ARM_ACLE_CRC_HASH -extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, uint64_t len); +extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); #elif defined(POWER8_VSX_CRC32) -extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, uint64_t len); +extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); #elif defined(S390_CRC32_VX) -extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, uint64_t len); +extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, size_t len); #endif /* compare256 */ diff --git a/crc32_braid.c b/crc32_braid.c index a7b9b7eb..96754b53 100644 --- a/crc32_braid.c +++ b/crc32_braid.c @@ -111,7 +111,7 @@ static z_word_t crc_word(z_word_t data) { #endif /* W */ /* ========================================================================= */ -Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64_t len) { +Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; /* Pre-condition the CRC */ @@ -120,7 +120,7 @@ Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64 #ifdef W /* If provided enough bytes, do a braided CRC calculation. */ if (len >= N * W + W - 1) { - uint64_t blks; + size_t blks; z_word_t const *words; int k; diff --git a/crc32_braid_p.h b/crc32_braid_p.h index 26906b64..1d8a0706 100644 --- a/crc32_braid_p.h +++ b/crc32_braid_p.h @@ -45,6 +45,6 @@ /* CRC polynomial. */ #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ -extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, uint64_t len); +extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); #endif /* CRC32_BRAID_P_H_ */ diff --git a/crc32_fold.c b/crc32_fold.c index b1011deb..5b3c7c45 100644 --- a/crc32_fold.c +++ b/crc32_fold.c @@ -14,23 +14,12 @@ Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) { return crc->value; } -Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) { +Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { crc->value = functable.crc32(crc->value, src, len); -/* Test that we don't try to copy more than actually fits in available address space */ -#if INTPTR_MAX > SSIZE_MAX - while (len > SSIZE_MAX) { - memcpy(dst, src, SSIZE_MAX); - dst += SSIZE_MAX; - src += SSIZE_MAX; - len -= SSIZE_MAX; - } -#endif - if (len) { - memcpy(dst, src, (size_t)len); - } + memcpy(dst, src, len); } -Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) { +Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) { /* Note: while this is basically the same thing as the vanilla CRC function, we still need * a functable entry for it so that we can generically dispatch to this function with the * same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The diff --git a/crc32_fold.h b/crc32_fold.h index 6aef5467..0d2ff669 100644 --- a/crc32_fold.h +++ b/crc32_fold.h @@ -14,8 +14,8 @@ typedef struct crc32_fold_s { } crc32_fold; Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); -Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len); -Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc); +Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); #endif diff --git a/functable.c b/functable.c index 4af8a8a7..5dd5e54d 100644 --- a/functable.c +++ b/functable.c @@ -233,7 +233,7 @@ Z_INTERNAL uint32_t crc32_fold_reset_stub(crc32_fold *crc) { return functable.crc32_fold_reset(crc); } -Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len) { +Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { functable.crc32_fold_copy = &crc32_fold_copy_c; cpu_check_features(); #ifdef X86_PCLMULQDQ_CRC @@ -243,7 +243,7 @@ Z_INTERNAL void crc32_fold_copy_stub(crc32_fold *crc, uint8_t *dst, const uint8_ functable.crc32_fold_copy(crc, dst, src, len); } -Z_INTERNAL void crc32_fold_stub(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc) { +Z_INTERNAL void crc32_fold_stub(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) { functable.crc32_fold = &crc32_fold_c; cpu_check_features(); #ifdef X86_PCLMULQDQ_CRC @@ -403,10 +403,7 @@ Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned return functable.chunkmemset_safe(out, dist, len, left); } -Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const uint8_t *buf, uint64_t len) { - Assert(sizeof(uint64_t) >= sizeof(size_t), - "crc32_z takes size_t but internally we have a uint64_t len"); - +Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const uint8_t *buf, size_t len) { functable.crc32 = &PREFIX(crc32_braid); cpu_check_features(); #ifdef ARM_ACLE_CRC_HASH diff --git a/functable.h b/functable.h index a5690871..9b6ec3b5 100644 --- a/functable.h +++ b/functable.h @@ -13,10 +13,10 @@ struct functable_s { uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, uint64_t len); uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len); - uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, uint64_t len); + uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len); uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc); - void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, uint64_t len); - void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, uint64_t len, uint32_t init_crc); + void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, size_t len); + void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, size_t len, uint32_t init_crc); uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc); uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1); uint32_t (* chunksize) (void);