Use uint64_t instead of size_t for len in adler32 to be consistent with crc32.

author Nathan Moinvaziri <nathan@nathanm.com>

Mon, 6 Jun 2022 04:28:49 +0000 (21:28 -0700)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Fri, 24 Jun 2022 13:12:00 +0000 (15:12 +0200)
author Nathan Moinvaziri <nathan@nathanm.com>
Mon, 6 Jun 2022 04:28:49 +0000 (21:28 -0700)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 24 Jun 2022 13:12:00 +0000 (15:12 +0200)
diff --git a/adler32.c b/adler32.c

index 42e7b0fe25fa5f205a0edec4810e61875831a190..7590d967907f10a0fbb6d7741835a53ea41c81db 100644 (file)
--- a/adler32.c
+++ b/adler32.c
@@ -8,7 +8,7 @@
  #include "adler32_p.h"
  
  /* ========================================================================= */
-Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
+Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, uint64_t len) {
      uint32_t sum2;
      unsigned n;
  
diff --git a/adler32_fold.c b/adler32_fold.c

index 20fec2bd3c94269d17edb59c20028ce0f383e410..66d63dd3512d822ca902889925eafe63deeab18f 100644 (file)
--- a/adler32_fold.c
+++ b/adler32_fold.c
@@ -1,5 +1,5 @@
  /* adler32_fold.c -- adler32 folding interface
- * Copyright (C) 2022 Adam Stylinski 
+ * Copyright (C) 2022 Adam Stylinski
   * For conditions of distribution and use, see copyright notice in zlib.h
   */
  
@@ -7,7 +7,16 @@
  #include "functable.h"
  #include "adler32_fold.h"
  
-Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
-    memcpy(dst, src, len);
-    return functable.adler32(adler, src, len);
+Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+    adler = functable.adler32(adler, src, len);
+    while (len > SIZE_MAX) {
+        memcpy(dst, src, SIZE_MAX);
+        dst += SIZE_MAX;
+        src += SIZE_MAX;
+        len -= SIZE_MAX;
+    }
+    if (len) {
+        memcpy(dst, src, (size_t)len);
+    }
+    return adler;
  }
diff --git a/adler32_fold.h b/adler32_fold.h

index 20aa1c7400b76a5dd7b48f1a736cd0cceef9d3f1..bdaf2130fe4b346aa90bb1a71bb5778c64808417 100644 (file)
--- a/adler32_fold.h
+++ b/adler32_fold.h
@@ -6,6 +6,6 @@
  #ifndef ADLER32_FOLD_H_
  #define ADLER32_FOLD_H_
  
-Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
  
  #endif
diff --git a/adler32_p.h b/adler32_p.h

index 1d2e77f49f0db0d5792bc018494f27a7d22750be..e97f2eac334de2ff3d01685e5a6578025f820590 100644 (file)
--- a/adler32_p.h
+++ b/adler32_p.h
@@ -26,7 +26,7 @@ static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, u
      return adler | (sum2 << 16);
  }
  
-static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) {
+static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf, uint64_t len, uint32_t sum2) {
      while (len) {
          --len;
          adler += *buf++;
@@ -38,9 +38,9 @@ static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf,
      return adler | (sum2 << 16);
  }
  
-static inline uint32_t adler32_copy_len_16(uint32_t adler, const unsigned char *buf, uint8_t *dst, size_t len, uint32_t sum2) {
+static inline uint32_t adler32_copy_len_16(uint32_t adler, const unsigned char *buf, uint8_t *dst, uint64_t len, uint32_t sum2) {
      while (len--) {
-        *dst = *buf++; 
+        *dst = *buf++;
          adler += *dst++;
          sum2 += adler;
      }
@@ -50,7 +50,7 @@ static inline uint32_t adler32_copy_len_16(uint32_t adler, const unsigned char *
      return adler | (sum2 << 16);
  }
  
-static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) {
+static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, uint64_t len, uint32_t sum2) {
  #ifdef UNROLL_MORE
      while (len >= 16) {
          len -= 16;
diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c

index 7c2c51fae0f8e95ad1bd9febf16e928edcde62ca..08032672e762173fe67c90a7aa4a697d2b948e37 100644 (file)
--- a/arch/arm/adler32_neon.c
+++ b/arch/arm/adler32_neon.c
@@ -15,7 +15,7 @@
  #include "../../adler32_p.h"
  #include "../../fallback_builtins.h"
  
-static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
+static void NEON_accum32(uint32_t *s, const unsigned char *buf, uint64_t len) {
      static const uint16_t ALIGNED_(16) taps[64] = {
          64, 63, 62, 61, 60, 59, 58, 57,
          56, 55, 54, 53, 52, 51, 50, 49,
@@ -138,7 +138,7 @@ static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
      s[1] = vget_lane_u32(as, 1);
  }
  
-static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, size_t len) {
+static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, uint64_t len) {
      unsigned int i;
      for (i = 0; i < len; ++i) {
          pair[0] += buf[i];
@@ -146,7 +146,7 @@ static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, size_t le
      }
  }
  
-uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {
+uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, uint64_t len) {
      /* split Adler-32 into component sums */
      uint32_t sum2 = (adler >> 16) & 0xffff;
      adler &= 0xffff;
diff --git a/arch/power/adler32_power8.c b/arch/power/adler32_power8.c

index f39f00246c799f3eb206170e05233abb67b5a6b3..fe5e46eaa8799c3693466681f5c80dbf9f73e063 100644 (file)
--- a/arch/power/adler32_power8.c
+++ b/arch/power/adler32_power8.c
@@ -52,7 +52,7 @@ static inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsi
      return __a;
  }
  
-uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len) {
+uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, uint64_t len) {
      uint32_t s1 = adler & 0xffff;
      uint32_t s2 = (adler >> 16) & 0xffff;
  
diff --git a/arch/power/adler32_vmx.c b/arch/power/adler32_vmx.c

index 6b7d9153b4533e2b65a27a037f2871acac6e1792..11fe55944995f2df8de5afbe0110816e6ab03dbb 100644 (file)
--- a/arch/power/adler32_vmx.c
+++ b/arch/power/adler32_vmx.c
@@ -12,7 +12,7 @@
  
  #define vmx_zero()  (vec_splat_u32(0))
  
-static inline void vmx_handle_head_or_tail(uint32_t *pair, const unsigned char *buf, size_t len) {
+static inline void vmx_handle_head_or_tail(uint32_t *pair, const unsigned char *buf, uint64_t len) {
      unsigned int i;
      for (i = 0; i < len; ++i) {
          pair[0] += buf[i];
@@ -20,7 +20,7 @@ static inline void vmx_handle_head_or_tail(uint32_t *pair, const unsigned char *
      }
  }
  
-static void vmx_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
+static void vmx_accum32(uint32_t *s, const unsigned char *buf, uint64_t len) {
      /* Different taps for the separable components of sums */
      const vector unsigned char t0 = {64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49};
      const vector unsigned char t1 = {48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33};
@@ -113,7 +113,7 @@ static void vmx_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
      vec_ste(s2acc, 0, s+1);
  }
  
-uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, size_t len) {
+uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, uint64_t len) {
      uint32_t sum2;
      uint32_t pair[16] ALIGNED_(16);
      memset(&pair[2], 0, 14);
diff --git a/arch/x86/adler32_avx2_tpl.h b/arch/x86/adler32_avx2_tpl.h

index 59cacfa48324a3d85b629bf2847436c1c6bd0c56..4ff1838de02202ae5718795aa8bdda5e71a4cb8c 100644 (file)
--- a/arch/x86/adler32_avx2_tpl.h
+++ b/arch/x86/adler32_avx2_tpl.h
@@ -11,8 +11,9 @@
  #include "adler32_avx2_p.h"
  
  #ifdef X86_SSE42_ADLER32
-extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
+extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, uint64_t len);
+
  #define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d)
  #define sub32(a, b, c) adler32_ssse3(a, b, c)
  #else
@@ -21,16 +22,16 @@ extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
  #endif
  
  #ifdef COPY
-Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
  #else
-Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, uint64_t len) {
  #endif
      if (src == NULL) return 1L;
      if (len == 0) return adler;
  
      uint32_t adler0, adler1;
      adler1 = (adler >> 16) & 0xffff;
-    adler0 = adler & 0xffff; 
+    adler0 = adler & 0xffff;
  
  rem_peel:
      if (len < 16) {
@@ -60,7 +61,7 @@ rem_peel:
         __m256i vs1_0 = vs1;
         __m256i vs3 = _mm256_setzero_si256();
  
-       size_t k = MIN(len, NMAX);
+       uint64_t k = MIN(len, NMAX);
         k -= k % 32;
         len -= k;
  
@@ -93,7 +94,7 @@ rem_peel:
  
         /* The compiler is generating the following sequence for this integer modulus
          * when done the scalar way, in GPRs:
-        
+
          adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE) +
                  (s1_unpack[4] % BASE) + (s1_unpack[5] % BASE) + (s1_unpack[6] % BASE) + (s1_unpack[7] % BASE);
  
@@ -101,9 +102,9 @@ rem_peel:
          ...
          vmovd  %xmm1,%esi // move vector lane 0 to 32 bit register %esi
          mov    %rsi,%rax  // zero-extend this value to 64 bit precision in %rax
-        imul   %rdi,%rsi // do a signed multiplication with magic constant and vector element 
+        imul   %rdi,%rsi // do a signed multiplication with magic constant and vector element
          shr    $0x2f,%rsi // shift right by 47
-        imul   $0xfff1,%esi,%esi // do a signed multiplication with value truncated to 32 bits with 0xfff1 
+        imul   $0xfff1,%esi,%esi // do a signed multiplication with value truncated to 32 bits with 0xfff1
          sub    %esi,%eax // subtract lower 32 bits of original vector value from modified one above
          ...
          // repeats for each element with vpextract instructions
@@ -111,17 +112,17 @@ rem_peel:
          This is tricky with AVX2 for a number of reasons:
              1.) There's no 64 bit multiplication instruction, but there is a sequence to get there
              2.) There's ways to extend vectors to 64 bit precision, but no simple way to truncate
-                back down to 32 bit precision later (there is in AVX512) 
+                back down to 32 bit precision later (there is in AVX512)
              3.) Full width integer multiplications aren't cheap
  
-        We can, however, and do a relatively cheap sequence for horizontal sums. 
+        We can, however, and do a relatively cheap sequence for horizontal sums.
          Then, we simply do the integer modulus on the resulting 64 bit GPR, on a scalar value. It was
          previously thought that casting to 64 bit precision was needed prior to the horizontal sum, but
          that is simply not the case, as NMAX is defined as the maximum number of scalar sums that can be
          performed on the maximum possible inputs before overflow
          */
  
- 
+
          /* In AVX2-land, this trip through GPRs will probably be unvoidable, as there's no cheap and easy
           * conversion from 64 bit integer to 32 bit (needed for the inexpensive modulus with a constant).
           * This casting to 32 bit is cheap through GPRs (just register aliasing). See above for exactly
diff --git a/arch/x86/adler32_avx512_tpl.h b/arch/x86/adler32_avx512_tpl.h

index d324ce98599ebc79b056e4f6317bdca951eb94dc..24198659883a47861b5be461ea562e28c6ec65a7 100644 (file)
--- a/arch/x86/adler32_avx512_tpl.h
+++ b/arch/x86/adler32_avx512_tpl.h
@@ -14,9 +14,9 @@
  #ifdef X86_AVX512_ADLER32
  
  #ifdef COPY
-Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
  #else
-Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, uint64_t len) {
  #endif
  
      if (src == NULL) return 1L;
@@ -24,7 +24,7 @@ Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t le
  
      uint32_t adler0, adler1;
      adler1 = (adler >> 16) & 0xffff;
-    adler0 = adler & 0xffff; 
+    adler0 = adler & 0xffff;
  
  rem_peel:
      if (len < 64) {
@@ -40,7 +40,7 @@ rem_peel:
  #elif defined(X86_SSSE3_ADLER32)
          return adler32_ssse3(adler, src, len);
  #else
-        return adler32_len_16(adler0, src, len, adler1); 
+        return adler32_len_16(adler0, src, len, adler1);
  #endif
      }
  
@@ -52,7 +52,7 @@ rem_peel:
                                            56, 57, 58, 59, 60, 61, 62, 63, 64);
      const __m512i dot3v = _mm512_set1_epi16(1);
      const __m512i zero = _mm512_setzero_si512();
-    size_t k;
+    uint64_t k;
  
      while (len >= 64) {
          __m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
@@ -100,7 +100,7 @@ rem_peel:
          goto rem_peel;
      }
  
-    return adler; 
+    return adler;
  }
  
  #endif
diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c

index 330bfe38e7d2e3c4144c3ea89d62377f23f6ee75..ecebdec733637aa43c77bb4f1e077d45fa4c96a2 100644 (file)
--- a/arch/x86/adler32_avx512_vnni.c
+++ b/arch/x86/adler32_avx512_vnni.c
@@ -18,20 +18,20 @@
  #include "adler32_avx512_p.h"
  #include "adler32_avx2_p.h"
  
-Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint64_t len) {
      if (src == NULL) return 1L;
      if (len == 0) return adler;
  
      uint32_t adler0, adler1;
      adler1 = (adler >> 16) & 0xffff;
-    adler0 = adler & 0xffff; 
+    adler0 = adler & 0xffff;
  
  rem_peel:
      if (len < 32)
  #if defined(X86_SSSE3_ADLER32)
          return adler32_ssse3(adler, src, len);
  #else
-        return adler32_len_16(adler0, src, len, adler1); 
+        return adler32_len_16(adler0, src, len, adler1);
  #endif
  
      if (len < 64)
@@ -40,7 +40,7 @@ rem_peel:
  #elif defined(X86_SSE3_ADLER32)
          return adler32_ssse3(adler, src, len);
  #else
-        return adler32_len_16(adler0, src, len, adler1); 
+        return adler32_len_16(adler0, src, len, adler1);
  #endif
  
      const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
@@ -54,7 +54,7 @@ rem_peel:
      while (len >= 64) {
          vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
          vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1));
-        size_t k = MIN(len, NMAX);
+        uint64_t k = MIN(len, NMAX);
          k -= k % 64;
          len -= k;
          __m512i vs1_0 = vs1;
@@ -117,16 +117,16 @@ rem_peel:
          goto rem_peel;
      }
  
-    return adler; 
+    return adler;
  }
  
-Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
      if (src == NULL) return 1L;
      if (len == 0) return adler;
  
      uint32_t adler0, adler1;
      adler1 = (adler >> 16) & 0xffff;
-    adler0 = adler & 0xffff; 
+    adler0 = adler & 0xffff;
  
  rem_peel_copy:
      if (len < 32) {
@@ -138,7 +138,7 @@ rem_peel_copy:
  #if defined(X86_SSSE3_ADLER32)
          return adler32_ssse3(adler, src, len);
  #else
-        return adler32_len_16(adler0, src, len, adler1); 
+        return adler32_len_16(adler0, src, len, adler1);
  #endif
      }
  
@@ -151,7 +151,7 @@ rem_peel_copy:
      while (len >= 32) {
          vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0));
          vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1));
-        size_t k = MIN(len, NMAX);
+        uint64_t k = MIN(len, NMAX);
          k -= k % 32;
          len -= k;
          __m256i vs1_0 = vs1;
@@ -219,7 +219,7 @@ rem_peel_copy:
          goto rem_peel_copy;
      }
  
-    return adler; 
+    return adler;
  }
  
  #endif
diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c

index 92efe4d8db35dafbf51beb23a424f873b6f09acc..5e68e4c41e0c71ec74bf67121d47bf50d4aba610 100644 (file)
--- a/arch/x86/adler32_sse42.c
+++ b/arch/x86/adler32_sse42.c
@@ -14,10 +14,10 @@
  
  #ifdef X86_SSE42_ADLER32
  
-Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
      uint32_t adler0, adler1;
      adler1 = (adler >> 16) & 0xffff;
-    adler0 = adler & 0xffff; 
+    adler0 = adler & 0xffff;
  
  rem_peel:
      if (len < 16) {
@@ -31,7 +31,7 @@ rem_peel:
      const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
      const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
      const __m128i dot3v = _mm_set1_epi16(1);
-    size_t k;
+    uint64_t k;
  
      while (len >= 16) {
  
diff --git a/arch/x86/adler32_ssse3.c b/arch/x86/adler32_ssse3.c

index 8c55badf985517b860ee839ec43b889f311321cd..1c9f58486d4e0bedfc6b39170837c941d9d99416 100644 (file)
--- a/arch/x86/adler32_ssse3.c
+++ b/arch/x86/adler32_ssse3.c
@@ -14,7 +14,7 @@
  
  #include <immintrin.h>
  
-Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len) {
+Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, uint64_t len) {
      uint32_t sum2;
  
       /* split Adler-32 into component sums */
@@ -46,10 +46,10 @@ Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size
       * additions worthwhile or if it's worth it to just eat the cost of an unaligned
       * load. This is a pretty simple test, just test if 16 - the remainder + len is
       * < 16 */
-    size_t max_iters = NMAX;
-    size_t rem = (uintptr_t)buf & 15;
-    size_t align_offset = 16 - rem;
-    size_t k = 0;
+    uint64_t max_iters = NMAX;
+    uint64_t rem = (uintptr_t)buf & 15;
+    uint64_t align_offset = 16 - rem;
+    uint64_t k = 0;
      if (rem) {
          if (len < 16 + align_offset) {
              /* Let's eat the cost of this one unaligned load so that
diff --git a/cpu_features.h b/cpu_features.h

index 50fdf6156fc40d25734c6355a9f60a4d072f5325..84ce3cfdf9a6cb92e4e6833d1d8b1bb6f37d3095 100644 (file)
--- a/cpu_features.h
+++ b/cpu_features.h
@@ -23,43 +23,43 @@
  extern void cpu_check_features(void);
  
  /* adler32 */
-typedef uint32_t (*adler32_func)(uint32_t adler, const unsigned char *buf, size_t len);
+typedef uint32_t (*adler32_func)(uint32_t adler, const unsigned char *buf, uint64_t len);
  
-extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, uint64_t len);
  #ifdef ARM_NEON_ADLER32
-extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, uint64_t len);
  #endif
  #ifdef PPC_VMX_ADLER32
-extern uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, uint64_t len);
  #endif
  #ifdef X86_SSSE3_ADLER32
-extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, uint64_t len);
  #endif
  #ifdef X86_AVX2_ADLER32
-extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, uint64_t len);
  #endif
  #ifdef X86_AVX512_ADLER32
-extern uint32_t adler32_avx512(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_avx512(uint32_t adler, const unsigned char *buf, uint64_t len);
  #endif
  #ifdef X86_AVX512VNNI_ADLER32
-extern uint32_t adler32_avx512_vnni(uint32_t adler, const unsigned char *buf, size_t len);
+extern uint32_t adler32_avx512_vnni(uint32_t adler, const unsigned char *buf, uint64_t len);
  #endif
  #ifdef POWER8_VSX_ADLER32
-extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len);
+extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, uint64_t len);
  #endif
  
  /* adler32 folding */
  #ifdef X86_SSE42_ADLER32
-extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
  #endif
  #ifdef X86_AVX2_ADLER32
-extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
  #endif
  #ifdef X86_AVX512_ADLER32
-extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
  #endif
  #ifdef X86_AVX512VNNI_ADLER32
-extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
  #endif
  
  /* CRC32 folding */
diff --git a/functable.c b/functable.c

index 0e0418c25f4e05d8da48f7f9737d4aa80395ce6c..4c9bc2d425a6b5ff8d80f9077272c30bfe9a3d99 100644 (file)
--- a/functable.c
+++ b/functable.c
@@ -162,7 +162,7 @@ Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_matc
      return functable.longest_match_slow(s, cur_match);
  }
  
-Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
+Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, uint64_t len) {
      // Initialize default
      functable.adler32 = &adler32_c;
      cpu_check_features();
@@ -202,7 +202,7 @@ Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_
      return functable.adler32(adler, buf, len);
  }
  
-Z_INTERNAL uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
      functable.adler32_fold_copy = &adler32_fold_copy_c;
  #if (defined X86_SSE42_ADLER32)
      if (x86_cpu_has_sse42)
diff --git a/functable.h b/functable.h

index 6d6d7e46086639316126efc7f5230b4e832e934d..932ddcc89816cd1772584597ad303cf636e20d95 100644 (file)
--- a/functable.h
+++ b/functable.h
@@ -11,8 +11,8 @@
  #include "adler32_fold.h"
  
  struct functable_s {
-    uint32_t (* adler32)            (uint32_t adler, const unsigned char *buf, size_t len);
-    uint32_t (* adler32_fold_copy)  (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+    uint32_t (* adler32)            (uint32_t adler, const unsigned char *buf, uint64_t len);
+    uint32_t (* adler32_fold_copy)  (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
      uint32_t (* crc32)              (uint32_t crc, const unsigned char *buf, uint64_t len);
      uint32_t (* crc32_fold_reset)   (crc32_fold *crc);
      void     (* crc32_fold_copy)    (crc32_fold *crc, uint8_t *dst, const uint8_t *src, uint64_t len);
diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc

index fac4c7f1cd7efef3abc4157f614227916f1580cc..acc3fe5c110fea9bb970d4a560e3a5d4bd8512bd 100644 (file)
--- a/test/benchmarks/benchmark_adler32_copy.cc
+++ b/test/benchmarks/benchmark_adler32_copy.cc
@@ -18,7 +18,7 @@ extern "C" {
  #define MAX_RANDOM_INTS (1024 * 1024)
  #define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))
  
-typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const unsigned char *buf, size_t len);
+typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const unsigned char *buf, uint64_t len);
  
  class adler32_copy: public benchmark::Fixture {
  private:
@@ -75,7 +75,7 @@ public:
              state.SkipWithError("CPU does not support " #name); \
          } \
          Bench(state, [](uint32_t init_sum, unsigned char *dst, \
-                        const unsigned char *buf, size_t len) -> uint32_t { \
+                        const unsigned char *buf, uint64_t len) -> uint32_t { \
              memcpy(dst, buf, len); \
              return fptr(init_sum, buf, len); \
          }); \
author	Nathan Moinvaziri <nathan@nathanm.com>
	Mon, 6 Jun 2022 04:28:49 +0000 (21:28 -0700)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Fri, 24 Jun 2022 13:12:00 +0000 (15:12 +0200)
adler32.c		patch \| blob \| blame \| history
adler32_fold.c		patch \| blob \| blame \| history
adler32_fold.h		patch \| blob \| blame \| history
adler32_p.h		patch \| blob \| blame \| history
arch/arm/adler32_neon.c		patch \| blob \| blame \| history
arch/power/adler32_power8.c		patch \| blob \| blame \| history
arch/power/adler32_vmx.c		patch \| blob \| blame \| history
arch/x86/adler32_avx2_tpl.h		patch \| blob \| blame \| history
arch/x86/adler32_avx512_tpl.h		patch \| blob \| blame \| history
arch/x86/adler32_avx512_vnni.c		patch \| blob \| blame \| history
arch/x86/adler32_sse42.c		patch \| blob \| blame \| history
arch/x86/adler32_ssse3.c		patch \| blob \| blame \| history
cpu_features.h		patch \| blob \| blame \| history
functable.c		patch \| blob \| blame \| history
functable.h		patch \| blob \| blame \| history
test/benchmarks/benchmark_adler32_copy.cc		patch \| blob \| blame \| history