From: Nathan Moinvaziri Date: Sun, 26 Jul 2020 01:24:48 +0000 (-0400) Subject: Use unaligned 32-bit and 64-bit compare based on best match length when searching... X-Git-Tag: 1.9.9-b1~79 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b0a3461245219f6bb4885de6c4529facabcf6a44;p=thirdparty%2Fzlib-ng.git Use unaligned 32-bit and 64-bit compare based on best match length when searching for matches. Move TRIGGER_LEVEL to match_tpl.h since it is only used in longest match. Use early return inside match loops instead of cont variable. Added back two variable check for platforms that don't supported unaligned access. --- diff --git a/deflate.h b/deflate.h index ce6106cf..93f7b5b7 100644 --- a/deflate.h +++ b/deflate.h @@ -404,8 +404,6 @@ void ZLIB_INTERNAL flush_pending(PREFIX3(streamp) strm); * used. */ -#define TRIGGER_LEVEL 5 - /* Bit buffer and compress bits calculation debugging */ #ifdef ZLIB_DEBUG # define cmpr_bits_add(s, len) s->compressed_len += (len) diff --git a/match_tpl.h b/match_tpl.h index 24bf01de..44bd04da 100644 --- a/match_tpl.h +++ b/match_tpl.h @@ -7,13 +7,11 @@ #define BESTCMP_TYPE #ifdef UNALIGNED_OK -#if MIN_MATCH >= 4 +# ifdef UNALIGNED64_OK +typedef uint64_t bestcmp_t; +# else typedef uint32_t bestcmp_t; -#elif MIN_MATCH >= 2 -typedef uint16_t bestcmp_t; -#else -typedef uint8_t bestcmp_t; -#endif +# endif #else typedef uint8_t bestcmp_t; #endif @@ -35,10 +33,26 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { ZLIB_REGISTER unsigned char *window = s->window; ZLIB_REGISTER unsigned char *scan = window + strstart; const Pos *prev = s->prev; - unsigned chain_length; Pos limit; - unsigned int best_len, nice_match; - bestcmp_t scan_end, scan_start; + uint32_t chain_length, nice_match, best_len, offset; + bestcmp_t scan_end; +#ifndef UNALIGNED_OK + bestcmp_t scan_end0; +#else + bestcmp_t scan_start; +#endif + +#define EARLY_EXIT_TRIGGER_LEVEL 5 + +#define RETURN_BEST_LEN \ + if (best_len <= s->lookahead) \ + return best_len; \ + return s->lookahead; + +#define GOTO_NEXT_CHAIN \ + if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \ + continue; \ + RETURN_BEST_LEN; /* * The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple @@ -46,12 +60,33 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { */ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + best_len = s->prev_length ? s->prev_length : 1; + + /* + * Calculate read offset which should only extend an extra byte + * to find the next best match length. + */ + offset = best_len-1; +#ifdef UNALIGNED_OK + if (best_len >= sizeof(uint32_t)) { + offset -= 2; +#ifdef UNALIGNED64_OK + if (best_len >= sizeof(uint64_t)) + offset -= 4; +#endif + } +#endif + + scan_end = *(bestcmp_t *)(scan+offset); +#ifndef UNALIGNED_OK + scan_end0 = *(bestcmp_t *)(scan+offset+1); +#else + scan_start = *(bestcmp_t *)(scan); +#endif + /* * Do not waste too much time if we already have a good match */ - best_len = s->prev_length; - if (best_len == 0) - best_len = 1; chain_length = s->max_chain_length; if (best_len >= s->good_match) chain_length >>= 2; @@ -60,7 +95,7 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * Do not look for matches beyond the end of the input. This is * necessary to make deflate deterministic */ - nice_match = (unsigned int)s->nice_match > s->lookahead ? s->lookahead : (unsigned int)s->nice_match; + nice_match = (uint32_t)s->nice_match > s->lookahead ? s->lookahead : (uint32_t)s->nice_match; /* * Stop when cur_match becomes <= limit. To simplify the code, @@ -68,14 +103,10 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { */ limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0; - scan_start = *(bestcmp_t *)(scan); - scan_end = *(bestcmp_t *)(scan+best_len-1); - Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead"); - do { + for (;;) { ZLIB_REGISTER unsigned char *match; ZLIB_REGISTER unsigned int len; - int cont; if (cur_match >= strstart) break; @@ -89,50 +120,78 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * is limited to the lookahead, so the output of deflate is not * affected by the uninitialized values. */ - cont = 1; - do { - match = window + cur_match; - if (LIKELY(*(bestcmp_t *)(match+best_len-1) != scan_end || - *(bestcmp_t *)(match) != scan_start)) { - if ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0) { - continue; - } - cont = 0; +#ifdef UNALIGNED_OK + if (best_len < sizeof(uint32_t)) { + for (;;) { + match = window + cur_match; + if (UNLIKELY(*(uint16_t *)(match+offset) == (uint16_t)scan_end && + *(uint16_t *)(match) == (uint16_t)scan_start)) + break; + GOTO_NEXT_CHAIN; } - break; - } while (1); - - if (!cont) - break; - -#if MIN_MATCH >= 2 && defined(UNALIGNED_OK) - len = COMPARE256(scan+2, match+2) + 2; +# ifdef UNALIGNED64_OK + } else if (best_len >= sizeof(uint64_t)) { + for (;;) { + match = window + cur_match; + if (UNLIKELY(*(uint64_t *)(match+offset) == (uint64_t)scan_end && + *(uint64_t *)(match) == (uint64_t)scan_start)) + break; + GOTO_NEXT_CHAIN; + } +# endif + } else { + for (;;) { + match = window + cur_match; + if (UNLIKELY(*(uint32_t *)(match+offset) == (uint32_t)scan_end && + *(uint32_t *)(match) == (uint32_t)scan_start)) + break; + GOTO_NEXT_CHAIN; + } + } #else - len = COMPARE258(scan, match); + for (;;) { + match = window + cur_match; + if (UNLIKELY(match[offset] == scan_end && match[offset+1] == scan_end0 && + match[0] == scan[0] && match[1] == scan[1])) + break; + GOTO_NEXT_CHAIN; + } #endif - + len = COMPARE256(scan+2, match+2) + 2; Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan"); if (len > best_len) { s->match_start = cur_match; best_len = len; - if (len >= nice_match) + if (best_len >= nice_match) break; - scan_end = *(bestcmp_t *)(scan+best_len-1); + offset = best_len-1; +#ifdef UNALIGNED_OK + if (best_len >= sizeof(uint32_t)) { + offset -= 2; +#ifdef UNALIGNED64_OK + if (best_len >= sizeof(uint64_t)) + offset -= 4; +#endif + } +#endif + scan_end = *(bestcmp_t *)(scan+offset); +#ifndef UNALIGNED_OK + scan_end0 = *(bestcmp_t *)(scan+offset+1); +#endif } else { /* * The probability of finding a match later if we here * is pretty low, so for performance it's best to * outright stop here for the lower compression levels */ - if (s->level < TRIGGER_LEVEL) + if (s->level < EARLY_EXIT_TRIGGER_LEVEL) break; } - } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); + GOTO_NEXT_CHAIN; + } - if (best_len <= s->lookahead) - return best_len; - return s->lookahead; + RETURN_BEST_LEN; } #undef LONGEST_MATCH