From: Nathan Moinvaziri Date: Sun, 16 Aug 2020 03:06:26 +0000 (-0700) Subject: Remove NO_DIVIDE from adler32. X-Git-Tag: 1.9.9-b1~105 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=193d8fd7dfb7927facab7a3034daa27ad5b9df1c;p=thirdparty%2Fzlib-ng.git Remove NO_DIVIDE from adler32. --- diff --git a/adler32.c b/adler32.c index 247c45be..989d7b4e 100644 --- a/adler32.c +++ b/adler32.c @@ -46,8 +46,8 @@ ZLIB_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_ buf += 8; #endif } while (--n); - MOD(adler); - MOD(sum2); + adler %= BASE; + sum2 %= BASE; } /* do remaining bytes (less than NMAX, still just one modulo) */ @@ -69,8 +69,8 @@ ZLIB_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_ adler += *buf++; sum2 += adler; } - MOD(adler); - MOD(sum2); + adler %= BASE; + sum2 %= BASE; } /* return recombined sums */ @@ -109,11 +109,11 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len return 0xffffffff; /* the derivation of this formula is left as an exercise for the reader */ - MOD63(len2); /* assumes len2 >= 0 */ + len2 %= BASE; /* assumes len2 >= 0 */ rem = (unsigned)len2; sum1 = adler1 & 0xffff; sum2 = rem * sum1; - MOD(sum2); + sum2 %= BASE; sum1 += (adler2 & 0xffff) + BASE - 1; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; if (sum1 >= BASE) sum1 -= BASE; diff --git a/adler32_p.h b/adler32_p.h index c1967320..7f75c71e 100644 --- a/adler32_p.h +++ b/adler32_p.h @@ -18,46 +18,6 @@ #define DO8(sum1, sum2, buf, i) {DO4(sum1, sum2, buf, i); DO4(sum1, sum2, buf, i+4);} #define DO16(sum1, sum2, buf) {DO8(sum1, sum2, buf, 0); DO8(sum1, sum2, buf, 8);} -/* use NO_DIVIDE if your processor does not do division in hardware -- - try it both ways to see which is faster */ -#ifdef NO_DIVIDE -/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 - (thank you to John Reiser for pointing this out) */ -# define CHOP(a) \ - do { \ - uint32_t tmp = a >> 16; \ - a &= 0xffff; \ - a += (tmp << 4) - tmp; \ - } while (0) -# define MOD28(a) \ - do { \ - CHOP(a); \ - if (a >= BASE) a -= BASE; \ - } while (0) -# define MOD(a) \ - do { \ - CHOP(a); \ - MOD28(a); \ - } while (0) -# define MOD63(a) \ - do { /* this assumes a is not negative */ \ - z_off64_t tmp = a >> 32; \ - a &= 0xffffffffL; \ - a += (tmp << 8) - (tmp << 5) + tmp; \ - tmp = a >> 16; \ - a &= 0xffffL; \ - a += (tmp << 4) - tmp; \ - tmp = a >> 16; \ - a &= 0xffffL; \ - a += (tmp << 4) - tmp; \ - if (a >= BASE) a -= BASE; \ - } while (0) -#else -# define MOD(a) a %= BASE -# define MOD28(a) a %= BASE -# define MOD63(a) a %= BASE -#endif - static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, uint32_t sum2) { adler += buf[0]; if (adler >= BASE) @@ -76,7 +36,7 @@ static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf, } if (adler >= BASE) adler -= BASE; - MOD28(sum2); /* only added so many BASE's */ + sum2 %= BASE; /* only added so many BASE's */ return adler | (sum2 << 16); } diff --git a/arch/x86/adler32_avx.c b/arch/x86/adler32_avx.c index cd5823f8..60d3da4b 100644 --- a/arch/x86/adler32_avx.c +++ b/arch/x86/adler32_avx.c @@ -93,12 +93,12 @@ ZLIB_INTERNAL uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, si adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE) + (s1_unpack[4] % BASE) + (s1_unpack[5] % BASE) + (s1_unpack[6] % BASE) + (s1_unpack[7] % BASE); - MOD(adler); + adler %= BASE; s1[7] = adler; sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE) + (s2_unpack[4] % BASE) + (s2_unpack[5] % BASE) + (s2_unpack[6] % BASE) + (s2_unpack[7] % BASE); - MOD(sum2); + sum2 %= BASE; s2[7] = sum2; } @@ -107,8 +107,8 @@ ZLIB_INTERNAL uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, si adler += *buf++; sum2 += adler; } - MOD(adler); - MOD(sum2); + adler %= BASE; + sum2 %= BASE; /* return recombined sums */ return adler | (sum2 << 16); diff --git a/arch/x86/adler32_ssse3.c b/arch/x86/adler32_ssse3.c index 931ea90c..c1a48e98 100644 --- a/arch/x86/adler32_ssse3.c +++ b/arch/x86/adler32_ssse3.c @@ -95,11 +95,11 @@ ZLIB_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, s _mm_store_si128((__m128i*)s2_unpack, vs2); adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE); - MOD(adler); + adler %= BASE; s1[3] = adler; sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE); - MOD(sum2); + sum2 %= BASE; s2[3] = sum2; } @@ -108,8 +108,8 @@ ZLIB_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, s adler += *buf++; sum2 += adler; } - MOD(adler); - MOD(sum2); + adler %= BASE; + sum2 %= BASE; /* return recombined sums */ return adler | (sum2 << 16);