]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Separate fast-zlib matching algorithm into its own longest_match variant.
authorNathan Moinvaziri <nathan@nathanm.com>
Tue, 22 Jun 2021 03:38:51 +0000 (20:38 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 25 Jun 2021 18:09:14 +0000 (20:09 +0200)
arch/x86/compare258_avx.c
arch/x86/compare258_sse.c
compare258.c
functable.c
functable.h
match_tpl.h

index d9108fdeb0e0707fe5ae9ebc57a3a8026be90736..3452127f5a3cfb073f8ff591178c554e672500c1 100644 (file)
@@ -58,9 +58,16 @@ Z_INTERNAL uint32_t compare258_unaligned_avx2(const unsigned char *src0, const u
     return compare258_unaligned_avx2_static(src0, src1);
 }
 
-#define LONGEST_MATCH   longest_match_unaligned_avx2
-#define COMPARE256      compare256_unaligned_avx2_static
-#define COMPARE258      compare258_unaligned_avx2_static
+#define LONGEST_MATCH       longest_match_unaligned_avx2
+#define COMPARE256          compare256_unaligned_avx2_static
+#define COMPARE258          compare258_unaligned_avx2_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_unaligned_avx2
+#define COMPARE256          compare256_unaligned_avx2_static
+#define COMPARE258          compare258_unaligned_avx2_static
 
 #include "match_tpl.h"
 
index 17534c051936cbfe9e9ab60f269800b5bee3b19e..1bea2e95bdb057e0310d1afb5955e716e3874388 100644 (file)
@@ -65,9 +65,16 @@ Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const u
     return compare258_unaligned_sse4_static(src0, src1);
 }
 
-#define LONGEST_MATCH   longest_match_unaligned_sse4
-#define COMPARE256      compare256_unaligned_sse4_static
-#define COMPARE258      compare258_unaligned_sse4_static
+#define LONGEST_MATCH       longest_match_unaligned_sse4
+#define COMPARE256          compare256_unaligned_sse4_static
+#define COMPARE258          compare258_unaligned_sse4_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_unaligned_sse4
+#define COMPARE256          compare256_unaligned_sse4_static
+#define COMPARE258          compare258_unaligned_sse4_static
 
 #include "match_tpl.h"
 
index 6b452b89cc9159454ad8752deae56407fbaadabc..f4f1936c45710faecaad39d509a7b10254731797 100644 (file)
@@ -57,9 +57,16 @@ Z_INTERNAL uint32_t compare258_c(const unsigned char *src0, const unsigned char
     return compare258_c_static(src0, src1);
 }
 
-#define LONGEST_MATCH   longest_match_c
-#define COMPARE256      compare256_c_static
-#define COMPARE258      compare258_c_static
+#define LONGEST_MATCH       longest_match_c
+#define COMPARE256          compare256_c_static
+#define COMPARE258          compare258_c_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_c
+#define COMPARE256          compare256_c_static
+#define COMPARE258          compare258_c_staticc
 
 #include "match_tpl.h"
 
@@ -97,9 +104,16 @@ Z_INTERNAL uint32_t compare258_unaligned_16(const unsigned char *src0, const uns
     return compare258_unaligned_16_static(src0, src1);
 }
 
-#define LONGEST_MATCH   longest_match_unaligned_16
-#define COMPARE256      compare256_unaligned_16_static
-#define COMPARE258      compare258_unaligned_16_static
+#define LONGEST_MATCH       longest_match_unaligned_16
+#define COMPARE256          compare256_unaligned_16_static
+#define COMPARE258          compare258_unaligned_16_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_unaligned_16
+#define COMPARE256          compare256_unaligned_16_static
+#define COMPARE258          compare258_unaligned_16_static
 
 #include "match_tpl.h"
 
@@ -135,9 +149,16 @@ Z_INTERNAL uint32_t compare258_unaligned_32(const unsigned char *src0, const uns
     return compare258_unaligned_32_static(src0, src1);
 }
 
-#define LONGEST_MATCH   longest_match_unaligned_32
-#define COMPARE256      compare256_unaligned_32_static
-#define COMPARE258      compare258_unaligned_32_static
+#define LONGEST_MATCH       longest_match_unaligned_32
+#define COMPARE256          compare256_unaligned_32_static
+#define COMPARE258          compare258_unaligned_32_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_unaligned_32
+#define COMPARE256          compare256_unaligned_32_static
+#define COMPARE258          compare258_unaligned_32_static
 
 #include "match_tpl.h"
 
@@ -175,9 +196,16 @@ Z_INTERNAL uint32_t compare258_unaligned_64(const unsigned char *src0, const uns
     return compare258_unaligned_64_static(src0, src1);
 }
 
-#define LONGEST_MATCH   longest_match_unaligned_64
-#define COMPARE256      compare256_unaligned_64_static
-#define COMPARE258      compare258_unaligned_64_static
+#define LONGEST_MATCH       longest_match_unaligned_64
+#define COMPARE256          compare256_unaligned_64_static
+#define COMPARE258          compare258_unaligned_64_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_unaligned_64
+#define COMPARE256          compare256_unaligned_64_static
+#define COMPARE258          compare258_unaligned_64_static
 
 #include "match_tpl.h"
 
index 8fc94c5c11fd53491465f54b144eac09c0864b81..5ed930c1083c40b1c86685e3d85e4d7a30f614c6 100644 (file)
@@ -142,6 +142,22 @@ extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_mat
 #endif
 #endif
 
+/* longest_match_slow */
+extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
+#ifdef UNALIGNED_OK
+extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
+extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
+#ifdef UNALIGNED64_OK
+extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
+#endif
+#ifdef X86_SSE42_CMP_STR
+extern uint32_t longest_match_slow_unaligned_sse4(deflate_state *const s, Pos cur_match);
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+extern uint32_t longest_match_slow_unaligned_avx2(deflate_state *const s, Pos cur_match);
+#endif
+#endif
+
 Z_INTERNAL Z_TLS struct functable_s functable;
 
 Z_INTERNAL void cpu_check_features(void)
@@ -474,6 +490,31 @@ Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) {
     return functable.longest_match(s, cur_match);
 }
 
+Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_match) {
+
+    functable.longest_match_slow = &longest_match_slow_c;
+
+#ifdef UNALIGNED_OK
+#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+    functable.longest_match_slow = &longest_match_slow_unaligned_64;
+#  elif defined(HAVE_BUILTIN_CTZ)
+    functable.longest_match_slow = &longest_match_slow_unaligned_32;
+#  else
+    functable.longest_match_slow = &longest_match_slow_unaligned_16;
+#  endif
+#  ifdef X86_SSE42_CMP_STR
+    if (x86_cpu_has_sse42)
+        functable.longest_match_slow = &longest_match_slow_unaligned_sse4;
+#  endif
+#  if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+    if (x86_cpu_has_avx2)
+        functable.longest_match_slow = &longest_match_slow_unaligned_avx2;
+#  endif
+#endif
+
+    return functable.longest_match_slow(s, cur_match);
+}
+
 /* functable init */
 Z_INTERNAL Z_TLS struct functable_s functable = {
     update_hash_stub,
@@ -484,6 +525,7 @@ Z_INTERNAL Z_TLS struct functable_s functable = {
     slide_hash_stub,
     compare258_stub,
     longest_match_stub,
+    longest_match_slow_stub,
     chunksize_stub,
     chunkcopy_stub,
     chunkcopy_safe_stub,
index 49d2f5d569e0c56a224cd37537e6855099741c33..f4b17569ac2f7bac00fe39fc6c18c9fcf02b3b41 100644 (file)
@@ -17,6 +17,7 @@ struct functable_s {
     void     (* slide_hash)         (deflate_state *s);
     uint32_t (* compare258)         (const unsigned char *src0, const unsigned char *src1);
     uint32_t (* longest_match)      (deflate_state *const s, Pos cur_match);
+    uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match);
     uint32_t (* chunksize)          (void);
     uint8_t* (* chunkcopy)          (uint8_t *out, uint8_t const *from, unsigned len);
     uint8_t* (* chunkcopy_safe)     (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
index 3d8ac4e3c5e77fe4901b7fd29e4b73373cf62ed3..6e6665455611d97d9f76575171ba52eac59cdda1 100644 (file)
@@ -45,12 +45,16 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
     Z_REGISTER unsigned char *mbase_start = window;
     Z_REGISTER unsigned char *mbase_end;
     const Pos *prev = s->prev;
-    Pos limit, limit_base;
+    Pos limit;
+#ifdef LONGEST_MATCH_SLOW
+    Pos limit_base;
+    int32_t rolling_hash;
+#else
     int32_t early_exit;
+#endif
     uint32_t chain_length, nice_match, best_len, offset;
     uint32_t lookahead = s->lookahead;
     Pos match_offset = 0;
-    int32_t rolling_hash;
     bestcmp_t scan_end;
 #ifndef UNALIGNED_OK
     bestcmp_t scan_end0;
@@ -92,8 +96,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
 
     /* Do not waste too much time if we already have a good match */
     chain_length = s->max_chain_length;
+#ifdef LONGEST_MATCH_SLOW
     rolling_hash = chain_length > 1024;
+#else
     early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
+#endif
     if (best_len >= s->good_match)
         chain_length >>= 2;
     nice_match = (uint32_t)s->nice_match;
@@ -101,8 +108,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
     /* Stop when cur_match becomes <= limit. To simplify the code,
      * we prevent matches with the string of window index 0
      */
-    limit = limit_base = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
-
+    limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
+#ifdef LONGEST_MATCH_SLOW
+    limit_base = limit;
     if (best_len >= STD_MIN_MATCH && rolling_hash) {
         /* We're continuing search (lazy evaluation). */
         uint32_t i, hash;
@@ -133,7 +141,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
         mbase_start -= match_offset;
         mbase_end -= match_offset;
     }
-
+#endif
     Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
     for (;;) {
         if (cur_match >= strstart)
@@ -207,6 +215,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
 #ifndef UNALIGNED_OK
             scan_end0 = *(bestcmp_t *)(scan+offset+1);
 #endif
+#ifdef LONGEST_MATCH_SLOW
             /* Look for a better string offset */
             if (len > STD_MIN_MATCH && match_start + len < strstart && rolling_hash) {
                 Pos pos, next_pos;
@@ -254,27 +263,33 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
                 mbase_start = window-match_offset;
                 mbase_end = (mbase_start+offset);
                 continue;
-            } else {
-                mbase_end = (mbase_start+offset);
             }
-        } else if (UNLIKELY(early_exit)) {
+#endif
+            mbase_end = (mbase_start+offset);
+        }
+#ifndef LONGEST_MATCH_SLOW
+        else if (UNLIKELY(early_exit)) {
             /* The probability of finding a match later if we here is pretty low, so for
              * performance it's best to outright stop here for the lower compression levels
              */
             break;
         }
+#endif
         GOTO_NEXT_CHAIN;
     }
     return best_len;
 
+#ifdef LONGEST_MATCH_SLOW
 break_matching:
 
     if (best_len < s->lookahead)
         return best_len;
 
     return s->lookahead;
+#endif
 }
 
+#undef LONGEST_MATCH_SLOW
 #undef LONGEST_MATCH
 #undef COMPARE256
 #undef COMPARE258