]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Restructure the loop, and see about 3% speedup in run time. I believe the
authorShuxin Yang <shuxinyang2006@gmail.com>
Tue, 18 Mar 2014 01:17:23 +0000 (18:17 -0700)
committerhansr <hk-git@circlestorm.org>
Wed, 8 Oct 2014 12:15:37 +0000 (14:15 +0200)
speedup arises from:
    o. Remove the conditional branch in the loop
    o. Remove some indirection memory accesses:
       The memory accesses to "s->prev_length" s->strstart" cannot be promoted
       to register because the compiler is not able to disambiguate them with
       store-operation in INSERT_STRING()
    o. Convert non-countable loop to countable loop.
       I'm not sure if this change really contribute, in general, countable
       loop is lots easier to optimized than non-countable loop.

Conflicts:
deflate.h

deflate.c
deflate.h

index 73f8f83a6f9f4139a2f397d3a83f1cbd5f192514..ba7235b5a436c35adf2aeb4725e521418e2ad641 100644 (file)
--- a/deflate.c
+++ b/deflate.c
@@ -236,6 +236,17 @@ local inline Pos insert_string(deflate_state *z_const s, z_const Pos str)
 }
 
 
+#ifndef NOT_TWEAK_COMPILER
+__attribute__ ((always_inline)) local void 
+bulk_insert_str(deflate_state *s, Pos startpos, uInt count) {
+    uInt idx;
+    for (idx = 0; idx < count; idx++) {
+        Posf dummy;
+        INSERT_STRING(s, startpos + idx, dummy);
+    }
+}
+#endif
+
 /* ===========================================================================
  * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
  * prev[] will be initialized on the fly.
@@ -1729,6 +1740,8 @@ local block_state deflate_slow(s, flush)
              * the hash table.
              */
             s->lookahead -= s->prev_length-1;
+
+#ifdef NOT_TWEAK_COMPILER
             s->prev_length -= 2;
             do {
                 if (++s->strstart <= max_insert) {
@@ -1738,6 +1751,20 @@ local block_state deflate_slow(s, flush)
             s->match_available = 0;
             s->match_length = MIN_MATCH-1;
             s->strstart++;
+#else
+            {
+                uInt mov_fwd = s->prev_length - 2;
+                uInt insert_cnt = mov_fwd;
+                if (unlikely(insert_cnt > max_insert - s->strstart))
+                    insert_cnt = max_insert - s->strstart;
+
+                bulk_insert_str(s, s->strstart + 1, insert_cnt);
+                s->prev_length = 0;
+                s->match_available = 0;
+                s->match_length = MIN_MATCH-1;
+                s->strstart += mov_fwd + 1;
+            }
+#endif /*NOT_TWEAK_COMPILER*/
 
             if (bflush) FLUSH_BLOCK(s, 0);
 
index 19904095586efea00d97270c30304ad691c2509d..5cf2ce1268dda0a76510ad93dfa7a365260ff6ee 100644 (file)
--- a/deflate.h
+++ b/deflate.h
@@ -445,4 +445,7 @@ local void send_bits(s, value, length)
 #endif
 
 
+#define likely(x)       __builtin_expect((x),1)
+#define unlikely(x)     __builtin_expect((x),0)
+
 #endif /* DEFLATE_H */