return compare258_unaligned_avx2_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_avx2
-#define COMPARE256 compare256_unaligned_avx2_static
-#define COMPARE258 compare258_unaligned_avx2_static
+#define LONGEST_MATCH longest_match_unaligned_avx2
+#define COMPARE256 compare256_unaligned_avx2_static
+#define COMPARE258 compare258_unaligned_avx2_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_unaligned_avx2
+#define COMPARE256 compare256_unaligned_avx2_static
+#define COMPARE258 compare258_unaligned_avx2_static
#include "match_tpl.h"
return compare258_unaligned_sse4_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_sse4
-#define COMPARE256 compare256_unaligned_sse4_static
-#define COMPARE258 compare258_unaligned_sse4_static
+#define LONGEST_MATCH longest_match_unaligned_sse4
+#define COMPARE256 compare256_unaligned_sse4_static
+#define COMPARE258 compare258_unaligned_sse4_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_unaligned_sse4
+#define COMPARE256 compare256_unaligned_sse4_static
+#define COMPARE258 compare258_unaligned_sse4_static
#include "match_tpl.h"
return compare258_c_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_c
-#define COMPARE256 compare256_c_static
-#define COMPARE258 compare258_c_static
+#define LONGEST_MATCH longest_match_c
+#define COMPARE256 compare256_c_static
+#define COMPARE258 compare258_c_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_c
+#define COMPARE256 compare256_c_static
+#define COMPARE258 compare258_c_staticc
#include "match_tpl.h"
return compare258_unaligned_16_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_16
-#define COMPARE256 compare256_unaligned_16_static
-#define COMPARE258 compare258_unaligned_16_static
+#define LONGEST_MATCH longest_match_unaligned_16
+#define COMPARE256 compare256_unaligned_16_static
+#define COMPARE258 compare258_unaligned_16_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_unaligned_16
+#define COMPARE256 compare256_unaligned_16_static
+#define COMPARE258 compare258_unaligned_16_static
#include "match_tpl.h"
return compare258_unaligned_32_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_32
-#define COMPARE256 compare256_unaligned_32_static
-#define COMPARE258 compare258_unaligned_32_static
+#define LONGEST_MATCH longest_match_unaligned_32
+#define COMPARE256 compare256_unaligned_32_static
+#define COMPARE258 compare258_unaligned_32_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_unaligned_32
+#define COMPARE256 compare256_unaligned_32_static
+#define COMPARE258 compare258_unaligned_32_static
#include "match_tpl.h"
return compare258_unaligned_64_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_64
-#define COMPARE256 compare256_unaligned_64_static
-#define COMPARE258 compare258_unaligned_64_static
+#define LONGEST_MATCH longest_match_unaligned_64
+#define COMPARE256 compare256_unaligned_64_static
+#define COMPARE258 compare258_unaligned_64_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH longest_match_slow_unaligned_64
+#define COMPARE256 compare256_unaligned_64_static
+#define COMPARE258 compare258_unaligned_64_static
#include "match_tpl.h"
#endif
#endif
+/* longest_match_slow */
+extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
+#ifdef UNALIGNED_OK
+extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
+extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
+#ifdef UNALIGNED64_OK
+extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
+#endif
+#ifdef X86_SSE42_CMP_STR
+extern uint32_t longest_match_slow_unaligned_sse4(deflate_state *const s, Pos cur_match);
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+extern uint32_t longest_match_slow_unaligned_avx2(deflate_state *const s, Pos cur_match);
+#endif
+#endif
+
Z_INTERNAL Z_TLS struct functable_s functable;
Z_INTERNAL void cpu_check_features(void)
return functable.longest_match(s, cur_match);
}
+Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_match) {
+
+ functable.longest_match_slow = &longest_match_slow_c;
+
+#ifdef UNALIGNED_OK
+# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+ functable.longest_match_slow = &longest_match_slow_unaligned_64;
+# elif defined(HAVE_BUILTIN_CTZ)
+ functable.longest_match_slow = &longest_match_slow_unaligned_32;
+# else
+ functable.longest_match_slow = &longest_match_slow_unaligned_16;
+# endif
+# ifdef X86_SSE42_CMP_STR
+ if (x86_cpu_has_sse42)
+ functable.longest_match_slow = &longest_match_slow_unaligned_sse4;
+# endif
+# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_avx2)
+ functable.longest_match_slow = &longest_match_slow_unaligned_avx2;
+# endif
+#endif
+
+ return functable.longest_match_slow(s, cur_match);
+}
+
/* functable init */
Z_INTERNAL Z_TLS struct functable_s functable = {
update_hash_stub,
slide_hash_stub,
compare258_stub,
longest_match_stub,
+ longest_match_slow_stub,
chunksize_stub,
chunkcopy_stub,
chunkcopy_safe_stub,
void (* slide_hash) (deflate_state *s);
uint32_t (* compare258) (const unsigned char *src0, const unsigned char *src1);
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
+ uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match);
uint32_t (* chunksize) (void);
uint8_t* (* chunkcopy) (uint8_t *out, uint8_t const *from, unsigned len);
uint8_t* (* chunkcopy_safe) (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
Z_REGISTER unsigned char *mbase_start = window;
Z_REGISTER unsigned char *mbase_end;
const Pos *prev = s->prev;
- Pos limit, limit_base;
+ Pos limit;
+#ifdef LONGEST_MATCH_SLOW
+ Pos limit_base;
+ int32_t rolling_hash;
+#else
int32_t early_exit;
+#endif
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
- int32_t rolling_hash;
bestcmp_t scan_end;
#ifndef UNALIGNED_OK
bestcmp_t scan_end0;
/* Do not waste too much time if we already have a good match */
chain_length = s->max_chain_length;
+#ifdef LONGEST_MATCH_SLOW
rolling_hash = chain_length > 1024;
+#else
early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
+#endif
if (best_len >= s->good_match)
chain_length >>= 2;
nice_match = (uint32_t)s->nice_match;
/* Stop when cur_match becomes <= limit. To simplify the code,
* we prevent matches with the string of window index 0
*/
- limit = limit_base = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
-
+ limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
+#ifdef LONGEST_MATCH_SLOW
+ limit_base = limit;
if (best_len >= STD_MIN_MATCH && rolling_hash) {
/* We're continuing search (lazy evaluation). */
uint32_t i, hash;
mbase_start -= match_offset;
mbase_end -= match_offset;
}
-
+#endif
Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
for (;;) {
if (cur_match >= strstart)
#ifndef UNALIGNED_OK
scan_end0 = *(bestcmp_t *)(scan+offset+1);
#endif
+#ifdef LONGEST_MATCH_SLOW
/* Look for a better string offset */
if (len > STD_MIN_MATCH && match_start + len < strstart && rolling_hash) {
Pos pos, next_pos;
mbase_start = window-match_offset;
mbase_end = (mbase_start+offset);
continue;
- } else {
- mbase_end = (mbase_start+offset);
}
- } else if (UNLIKELY(early_exit)) {
+#endif
+ mbase_end = (mbase_start+offset);
+ }
+#ifndef LONGEST_MATCH_SLOW
+ else if (UNLIKELY(early_exit)) {
/* The probability of finding a match later if we here is pretty low, so for
* performance it's best to outright stop here for the lower compression levels
*/
break;
}
+#endif
GOTO_NEXT_CHAIN;
}
return best_len;
+#ifdef LONGEST_MATCH_SLOW
break_matching:
if (best_len < s->lookahead)
return best_len;
return s->lookahead;
+#endif
}
+#undef LONGEST_MATCH_SLOW
#undef LONGEST_MATCH
#undef COMPARE256
#undef COMPARE258