#include "adler32_p.h"
/* ========================================================================= */
-Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, uint64_t len) {
+Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t sum2;
unsigned n;
#define DO8(sum1, sum2, buf, i) {DO4(sum1, sum2, buf, i); DO4(sum1, sum2, buf, i+4);}
#define DO16(sum1, sum2, buf) {DO8(sum1, sum2, buf, 0); DO8(sum1, sum2, buf, 8);}
-static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, uint32_t sum2) {
+static inline uint32_t adler32_len_1(uint32_t adler, const uint8_t *buf, uint32_t sum2) {
adler += buf[0];
adler %= BASE;
sum2 += adler;
return adler | (sum2 << 16);
}
-static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf, uint64_t len, uint32_t sum2) {
+static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, uint64_t len, uint32_t sum2) {
while (len) {
--len;
adler += *buf++;
return adler | (sum2 << 16);
}
-static inline uint32_t adler32_copy_len_16(uint32_t adler, const unsigned char *buf, uint8_t *dst, uint64_t len, uint32_t sum2) {
+static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, uint8_t *dst, uint64_t len, uint32_t sum2) {
while (len--) {
*dst = *buf++;
adler += *dst++;
return adler | (sum2 << 16);
}
-static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, uint64_t len, uint32_t sum2) {
+static inline uint32_t adler32_len_64(uint32_t adler, const uint8_t *buf, uint64_t len, uint32_t sum2) {
#ifdef UNROLL_MORE
while (len >= 16) {
len -= 16;
#include "../../adler32_p.h"
#include "../../fallback_builtins.h"
-static void NEON_accum32(uint32_t *s, const unsigned char *buf, uint64_t len) {
+static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
static const uint16_t ALIGNED_(16) taps[64] = {
64, 63, 62, 61, 60, 59, 58, 57,
56, 55, 54, 53, 52, 51, 50, 49,
s[1] = vget_lane_u32(as, 1);
}
-static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, uint64_t len) {
+static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, uint64_t len) {
unsigned int i;
for (i = 0; i < len; ++i) {
pair[0] += buf[i];
}
}
-uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, uint64_t len) {
+uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, uint64_t len) {
/* split Adler-32 into component sums */
uint32_t sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
#endif
#include "../../zbuild.h"
-uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
+uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, uint64_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
Z_REGISTER const uint32_t *buf4;
return __a;
}
-uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, uint64_t len) {
+uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t s1 = adler & 0xffff;
uint32_t s2 = (adler >> 16) & 0xffff;
#define vmx_zero() (vec_splat_u32(0))
-static inline void vmx_handle_head_or_tail(uint32_t *pair, const unsigned char *buf, uint64_t len) {
+static inline void vmx_handle_head_or_tail(uint32_t *pair, const uint8_t *buf, uint64_t len) {
unsigned int i;
for (i = 0; i < len; ++i) {
pair[0] += buf[i];
}
}
-static void vmx_accum32(uint32_t *s, const unsigned char *buf, uint64_t len) {
+static void vmx_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
/* Different taps for the separable components of sums */
const vector unsigned char t0 = {64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49};
const vector unsigned char t1 = {48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33};
vec_ste(s2acc, 0, s+1);
}
-uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, uint64_t len) {
+uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t sum2;
uint32_t pair[16] ALIGNED_(16);
memset(&pair[2], 0, 14);
typedef unsigned int uv4si __attribute__((vector_size(16)));
typedef unsigned long long uv2di __attribute__((vector_size(16)));
-static uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, uint64_t len) {
+static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, uint64_t len) {
/*
* The CRC-32 constant block contains reduction constants to fold and
* process particular chunks of the input data stream in parallel.
#include <immintrin.h>
-Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, uint64_t len) {
+Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t sum2;
/* split Adler-32 into component sums */
return crc->value;
}
-uint32_t crc32_pclmulqdq(uint32_t crc32, const unsigned char* buf, uint64_t len) {
+uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, uint64_t len) {
/* For lens < 64, crc32_braid method is faster. The CRC32 instruction for
* these short lengths might also prove to be effective */
if (len < 64)
extern void cpu_check_features(void);
/* adler32 */
-typedef uint32_t (*adler32_func)(uint32_t adler, const unsigned char *buf, uint64_t len);
+typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, uint64_t len);
-extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, uint64_t len);
#ifdef ARM_NEON_ADLER32
-extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
#ifdef PPC_VMX_ADLER32
-extern uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
#ifdef X86_SSSE3_ADLER32
-extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
#ifdef X86_AVX2_ADLER32
-extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
#ifdef X86_AVX512_ADLER32
-extern uint32_t adler32_avx512(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
#ifdef X86_AVX512VNNI_ADLER32
-extern uint32_t adler32_avx512_vnni(uint32_t adler, const unsigned char *buf, uint64_t len);
+extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
#ifdef POWER8_VSX_ADLER32
-extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, uint64_t len);
+extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, uint64_t len);
#endif
/* adler32 folding */
extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
-extern uint32_t crc32_pclmulqdq(uint32_t crc32, const unsigned char* buf, uint64_t len);
+extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, uint64_t len);
#endif
/* memory chunking */
#endif
/* CRC32 */
-typedef uint32_t (*crc32_func)(uint32_t crc32, const unsigned char * buf, uint64_t len);
+typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, uint64_t len);
-extern uint32_t crc32_braid(uint32_t crc, const unsigned char *buf, uint64_t len);
+extern uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, uint64_t len);
#ifdef ARM_ACLE_CRC_HASH
-extern uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len);
+extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, uint64_t len);
#elif defined(POWER8_VSX_CRC32)
-extern uint32_t crc32_power8(uint32_t crc, const unsigned char *buf, uint64_t len);
+extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, uint64_t len);
#elif defined(S390_CRC32_VX)
-extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, uint64_t len);
+extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, uint64_t len);
#endif
/* compare256 */
#endif /* W */
/* ========================================================================= */
-Z_INTERNAL uint32_t crc32_braid(uint32_t crc, const unsigned char *buf, uint64_t len) {
+Z_INTERNAL uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, uint64_t len) {
Z_REGISTER uint32_t c;
/* Pre-condition the CRC */
/* CRC polynomial. */
#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
-extern uint32_t crc32_braid(uint32_t crc, const unsigned char *buf, uint64_t len);
+extern uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, uint64_t len);
#endif /* CRC32_BRAID_P_H_ */
return functable.longest_match_slow(s, cur_match);
}
-Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, uint64_t len) {
+Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, uint64_t len) {
// Initialize default
functable.adler32 = &adler32_c;
cpu_check_features();
return functable.chunkmemset_safe(out, dist, len, left);
}
-Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
+Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const uint8_t *buf, uint64_t len) {
Assert(sizeof(uint64_t) >= sizeof(size_t),
"crc32_z takes size_t but internally we have a uint64_t len");
#include "adler32_fold.h"
struct functable_s {
- uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, uint64_t len);
+ uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, uint64_t len);
uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
- uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len);
+ uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, uint64_t len);
uint32_t (* crc32_fold_reset) (crc32_fold *crc);
void (* crc32_fold_copy) (crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
void (* crc32_fold) (crc32_fold *crc, const uint8_t *src, uint64_t len, uint32_t init_crc);
#define MAX_RANDOM_INTS (1024 * 1024)
#define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))
-typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const unsigned char *buf, uint64_t len);
+typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const uint8_t *buf, uint64_t len);
class adler32_copy: public benchmark::Fixture {
private:
state.SkipWithError("CPU does not support " #name); \
} \
Bench(state, [](uint32_t init_sum, unsigned char *dst, \
- const unsigned char *buf, uint64_t len) -> uint32_t { \
+ const uint8_t *buf, uint64_t len) -> uint32_t { \
memcpy(dst, buf, len); \
return fptr(init_sum, buf, len); \
}); \