From: Jim Kukunas Date: Wed, 17 Jul 2013 17:34:56 +0000 (-0700) Subject: Add preprocessor define to tune Adler32 loop unrolling. X-Git-Tag: v1.2.8-jtkv4~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fad00ea21a41690416232dd7ed93f1553901a432;p=thirdparty%2Fzlib-ng.git Add preprocessor define to tune Adler32 loop unrolling. Excessive loop unrolling is detrimental to performance. This patch adds a preprocessor define, ADLER32_UNROLL_LESS, to reduce unrolling factor from 16 to 8. Updates configure script to set as default on x86 --- diff --git a/adler32.c b/adler32.c index a868f073d..1007e38af 100644 --- a/adler32.c +++ b/adler32.c @@ -104,10 +104,19 @@ uLong ZEXPORT adler32(adler, buf, len) /* do length NMAX blocks -- requires just one modulo operation */ while (len >= NMAX) { len -= NMAX; +#ifndef ADLER32_UNROLL_LESS n = NMAX / 16; /* NMAX is divisible by 16 */ +#else + n = NMAX / 8; /* NMAX is divisible by 8 */ +#endif do { +#ifndef ADLER32_UNROLL_LESS DO16(buf); /* 16 sums unrolled */ buf += 16; +#else + DO8(buf,0); /* 8 sums unrolled */ + buf += 8; +#endif } while (--n); MOD(adler); MOD(sum2); @@ -115,10 +124,17 @@ uLong ZEXPORT adler32(adler, buf, len) /* do remaining bytes (less than NMAX, still just one modulo) */ if (len) { /* avoid modulos if none remaining */ +#ifndef ADLER32_UNROLL_LESS while (len >= 16) { len -= 16; DO16(buf); buf += 16; +#else + while (len >= 8) { + len -= 8; + DO8(buf, 0); + buf += 8; +#endif } while (len--) { adler += *buf++; diff --git a/configure b/configure index b1c5a7eb6..d6f790bae 100755 --- a/configure +++ b/configure @@ -771,6 +771,9 @@ case "${ARCH}" in CFLAGS="${CFLAGS} -DUNALIGNED_OK" SFLAGS="${SFLAGS} -DUNALIGNED_OK" + + CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS" + SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS" ;; i386 | i486 | i586 | i686) OBJC="${OBJC} x86.o" @@ -781,6 +784,9 @@ case "${ARCH}" in CFLAGS="${CFLAGS} -DUNALIGNED_OK" SFLAGS="${SFLAGS} -DUNALIGNED_OK" + + CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS" + SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS" ;; esac