Zero entire s1 and s2 for possibly better performance.

author Nathan Moinvaziri <nathan@nathanm.com>

Mon, 25 May 2020 13:28:38 +0000 (06:28 -0700)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Mon, 8 Jun 2020 19:17:18 +0000 (21:17 +0200)
author Nathan Moinvaziri <nathan@nathanm.com>
Mon, 25 May 2020 13:28:38 +0000 (06:28 -0700)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Mon, 8 Jun 2020 19:17:18 +0000 (21:17 +0200)
diff --git a/arch/x86/adler32_avx.c b/arch/x86/adler32_avx.c

index d316f19525b786ef483f7d019539b17f5b7f2197..8907cdd3b987a550d4b310725bbb651ff278147a 100644 (file)
--- a/arch/x86/adler32_avx.c
+++ b/arch/x86/adler32_avx.c
@@ -37,8 +37,8 @@ uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len) {
          return adler32_len_16(adler, buf, len, sum2);
  
      uint32_t ALIGNED_(32) s1[8], s2[8];
-    memset(s1, '\0', sizeof(uint32_t)*7); s1[7] = adler; // TODO: would a masked load be faster?
-    memset(s2, '\0', sizeof(uint32_t)*7); s2[7] = sum2;
+    memset(s1, 0, sizeof(s1)); s1[7] = adler; // TODO: would a masked load be faster?
+    memset(s2, 0, sizeof(s2)); s2[7] = sum2;
      char ALIGNED_(32) dot1[32] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
      __m256i dot1v = _mm256_load_si256((__m256i*)dot1);
      char ALIGNED_(32) dot2[32] = {32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
author	Nathan Moinvaziri <nathan@nathanm.com>
	Mon, 25 May 2020 13:28:38 +0000 (06:28 -0700)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Mon, 8 Jun 2020 19:17:18 +0000 (21:17 +0200)