]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Use unaligned 32-bit and 64-bit compare based on best match length when searching...
authorNathan Moinvaziri <nathan@solidstatenetworks.com>
Sun, 26 Jul 2020 01:24:48 +0000 (21:24 -0400)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sun, 23 Aug 2020 07:56:11 +0000 (09:56 +0200)
Move TRIGGER_LEVEL to match_tpl.h since it is only used in longest match.
Use early return inside match loops instead of cont variable.
Added back two variable check for platforms that don't supported unaligned access.

deflate.h
match_tpl.h

index ce6106cffa312ffe107499b2cf93d75cd399515d..93f7b5b70a5b0e1fb0933a1327c77b36181e0280 100644 (file)
--- a/deflate.h
+++ b/deflate.h
@@ -404,8 +404,6 @@ void ZLIB_INTERNAL flush_pending(PREFIX3(streamp) strm);
  * used.
  */
 
-#define TRIGGER_LEVEL 5
-
 /* Bit buffer and compress bits calculation debugging */
 #ifdef ZLIB_DEBUG
 #  define cmpr_bits_add(s, len)     s->compressed_len += (len)
index 24bf01de79d12493651e1fbc26cabaa4dceccb1e..44bd04da333003f61254bc54baef288d25943fd4 100644 (file)
@@ -7,13 +7,11 @@
 #define BESTCMP_TYPE
 
 #ifdef UNALIGNED_OK
-#if MIN_MATCH >= 4
+#  ifdef UNALIGNED64_OK
+typedef uint64_t        bestcmp_t;
+#  else
 typedef uint32_t        bestcmp_t;
-#elif MIN_MATCH >= 2
-typedef uint16_t        bestcmp_t;
-#else
-typedef uint8_t         bestcmp_t;
-#endif
+#  endif
 #else
 typedef uint8_t         bestcmp_t;
 #endif
@@ -35,10 +33,26 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
     ZLIB_REGISTER unsigned char *window = s->window;
     ZLIB_REGISTER unsigned char *scan = window + strstart;
     const Pos *prev = s->prev;
-    unsigned chain_length;
     Pos limit;
-    unsigned int best_len, nice_match;
-    bestcmp_t scan_end, scan_start;
+    uint32_t chain_length, nice_match, best_len, offset;
+    bestcmp_t scan_end;
+#ifndef UNALIGNED_OK
+    bestcmp_t scan_end0;
+#else
+    bestcmp_t scan_start;
+#endif
+
+#define EARLY_EXIT_TRIGGER_LEVEL 5
+
+#define RETURN_BEST_LEN \
+    if (best_len <= s->lookahead) \
+        return best_len; \
+    return s->lookahead;
+
+#define GOTO_NEXT_CHAIN \
+    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
+        continue; \
+    RETURN_BEST_LEN;
 
     /*
      * The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple
@@ -46,12 +60,33 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
      */
     Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
 
+    best_len = s->prev_length ? s->prev_length : 1;
+
+    /* 
+     * Calculate read offset which should only extend an extra byte 
+     * to find the next best match length.
+     */
+    offset = best_len-1;
+#ifdef UNALIGNED_OK
+    if (best_len >= sizeof(uint32_t)) {
+        offset -= 2;
+#ifdef UNALIGNED64_OK
+        if (best_len >= sizeof(uint64_t))
+            offset -= 4;
+#endif
+    }
+#endif
+
+    scan_end   = *(bestcmp_t *)(scan+offset);
+#ifndef UNALIGNED_OK
+    scan_end0  = *(bestcmp_t *)(scan+offset+1);
+#else
+    scan_start = *(bestcmp_t *)(scan);
+#endif
+
     /*
      * Do not waste too much time if we already have a good match
      */
-    best_len = s->prev_length;
-    if (best_len == 0)
-        best_len = 1;
     chain_length = s->max_chain_length;
     if (best_len >= s->good_match)
         chain_length >>= 2;
@@ -60,7 +95,7 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
      * Do not look for matches beyond the end of the input. This is
      * necessary to make deflate deterministic
      */
-    nice_match = (unsigned int)s->nice_match > s->lookahead ? s->lookahead : (unsigned int)s->nice_match;
+    nice_match = (uint32_t)s->nice_match > s->lookahead ? s->lookahead : (uint32_t)s->nice_match;
 
     /*
      * Stop when cur_match becomes <= limit. To simplify the code,
@@ -68,14 +103,10 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
      */
     limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
 
-    scan_start = *(bestcmp_t *)(scan);
-    scan_end = *(bestcmp_t *)(scan+best_len-1);
-
     Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
-    do {
+    for (;;) {
         ZLIB_REGISTER unsigned char *match;
         ZLIB_REGISTER unsigned int len;
-        int cont;
         if (cur_match >= strstart)
             break;
 
@@ -89,50 +120,78 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
          * is limited to the lookahead, so the output of deflate is not
          * affected by the uninitialized values.
          */
-        cont = 1;
-        do {
-            match = window + cur_match;
-            if (LIKELY(*(bestcmp_t *)(match+best_len-1) != scan_end ||
-                       *(bestcmp_t *)(match) != scan_start)) {
-                if ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0) {
-                    continue;
-                }
-                cont = 0;
+#ifdef UNALIGNED_OK
+        if (best_len < sizeof(uint32_t)) {
+            for (;;) {
+                match = window + cur_match;
+                if (UNLIKELY(*(uint16_t *)(match+offset) == (uint16_t)scan_end &&
+                             *(uint16_t *)(match) == (uint16_t)scan_start))
+                    break;
+                GOTO_NEXT_CHAIN;
             }
-            break;
-        } while (1);
-
-        if (!cont)
-            break;
-
-#if MIN_MATCH >= 2 && defined(UNALIGNED_OK)
-        len = COMPARE256(scan+2, match+2) + 2;
+#  ifdef UNALIGNED64_OK
+        } else if (best_len >= sizeof(uint64_t)) {
+            for (;;) {
+                match = window + cur_match;
+                if (UNLIKELY(*(uint64_t *)(match+offset) == (uint64_t)scan_end &&
+                             *(uint64_t *)(match) == (uint64_t)scan_start))
+                    break;
+                GOTO_NEXT_CHAIN;
+            }
+#  endif
+        } else {
+            for (;;) {
+                match = window + cur_match;
+                if (UNLIKELY(*(uint32_t *)(match+offset) == (uint32_t)scan_end &&
+                             *(uint32_t *)(match) == (uint32_t)scan_start))
+                    break;
+                GOTO_NEXT_CHAIN;
+            }
+        }
 #else
-        len = COMPARE258(scan, match);
+        for (;;) {
+            match = window + cur_match;
+            if (UNLIKELY(match[offset] == scan_end && match[offset+1] == scan_end0 &&
+                         match[0] == scan[0] && match[1] == scan[1]))
+                break;
+            GOTO_NEXT_CHAIN;
+        }
 #endif
-
+        len = COMPARE256(scan+2, match+2) + 2;
         Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
 
         if (len > best_len) {
             s->match_start = cur_match;
             best_len = len;
-            if (len >= nice_match)
+            if (best_len >= nice_match)
                 break;
-            scan_end = *(bestcmp_t *)(scan+best_len-1);
+            offset = best_len-1;
+#ifdef UNALIGNED_OK
+            if (best_len >= sizeof(uint32_t)) {
+                offset -= 2;
+#ifdef UNALIGNED64_OK
+                if (best_len >= sizeof(uint64_t))
+                    offset -= 4;
+#endif
+            }
+#endif
+            scan_end = *(bestcmp_t *)(scan+offset);
+#ifndef UNALIGNED_OK
+            scan_end0 = *(bestcmp_t *)(scan+offset+1);
+#endif
         } else {
             /*
              * The probability of finding a match later if we here
              * is pretty low, so for performance it's best to
              * outright stop here for the lower compression levels
              */
-            if (s->level < TRIGGER_LEVEL)
+            if (s->level < EARLY_EXIT_TRIGGER_LEVEL)
                 break;
         }
-    } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
+        GOTO_NEXT_CHAIN;
+    }
 
-    if (best_len <= s->lookahead)
-        return best_len;
-    return s->lookahead;
+    RETURN_BEST_LEN;
 }
 
 #undef LONGEST_MATCH