From: Shuxin Yang Date: Tue, 18 Mar 2014 01:17:23 +0000 (-0700) Subject: Restructure the loop, and see about 3% speedup in run time. I believe the X-Git-Tag: 1.9.9-b1~935 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=34bf824bf755fdf4979a9ed26e86028a12fdedc8;p=thirdparty%2Fzlib-ng.git Restructure the loop, and see about 3% speedup in run time. I believe the speedup arises from: o. Remove the conditional branch in the loop o. Remove some indirection memory accesses: The memory accesses to "s->prev_length" s->strstart" cannot be promoted to register because the compiler is not able to disambiguate them with store-operation in INSERT_STRING() o. Convert non-countable loop to countable loop. I'm not sure if this change really contribute, in general, countable loop is lots easier to optimized than non-countable loop. Conflicts: deflate.h --- diff --git a/deflate.c b/deflate.c index 73f8f83a6..ba7235b5a 100644 --- a/deflate.c +++ b/deflate.c @@ -236,6 +236,17 @@ local inline Pos insert_string(deflate_state *z_const s, z_const Pos str) } +#ifndef NOT_TWEAK_COMPILER +__attribute__ ((always_inline)) local void +bulk_insert_str(deflate_state *s, Pos startpos, uInt count) { + uInt idx; + for (idx = 0; idx < count; idx++) { + Posf dummy; + INSERT_STRING(s, startpos + idx, dummy); + } +} +#endif + /* =========================================================================== * Initialize the hash table (avoiding 64K overflow for 16 bit systems). * prev[] will be initialized on the fly. @@ -1729,6 +1740,8 @@ local block_state deflate_slow(s, flush) * the hash table. */ s->lookahead -= s->prev_length-1; + +#ifdef NOT_TWEAK_COMPILER s->prev_length -= 2; do { if (++s->strstart <= max_insert) { @@ -1738,6 +1751,20 @@ local block_state deflate_slow(s, flush) s->match_available = 0; s->match_length = MIN_MATCH-1; s->strstart++; +#else + { + uInt mov_fwd = s->prev_length - 2; + uInt insert_cnt = mov_fwd; + if (unlikely(insert_cnt > max_insert - s->strstart)) + insert_cnt = max_insert - s->strstart; + + bulk_insert_str(s, s->strstart + 1, insert_cnt); + s->prev_length = 0; + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart += mov_fwd + 1; + } +#endif /*NOT_TWEAK_COMPILER*/ if (bflush) FLUSH_BLOCK(s, 0); diff --git a/deflate.h b/deflate.h index 199040955..5cf2ce126 100644 --- a/deflate.h +++ b/deflate.h @@ -445,4 +445,7 @@ local void send_bits(s, value, length) #endif +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + #endif /* DEFLATE_H */