From: Nathan Moinvaziri Date: Sat, 9 May 2020 01:57:07 +0000 (-0500) Subject: Converted compare258 to static and convert longest_match to template. X-Git-Tag: 1.9.9-b1~297 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c97a965f18d98028a4b26708629088098b404c34;p=thirdparty%2Fzlib-ng.git Converted compare258 to static and convert longest_match to template. --- diff --git a/arch/x86/compare258_avx.c b/arch/x86/compare258_avx.c index 10096eab6..010c6922b 100644 --- a/arch/x86/compare258_avx.c +++ b/arch/x86/compare258_avx.c @@ -16,7 +16,7 @@ #endif /* UNALIGNED_OK, AVX2 intrinsic comparison */ -int32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) { const unsigned char *src0start = src0; const unsigned char *src0end = src0 + 256; @@ -53,4 +53,13 @@ int32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char return (int32_t)(src0 - src0start); } +int32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) { + return compare258_unaligned_avx2_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_avx2 +#define COMPARE258 compare258_unaligned_avx2_static + +#include "match_p.h" + #endif diff --git a/arch/x86/compare258_sse.c b/arch/x86/compare258_sse.c index 916e383b1..b93b8a41b 100644 --- a/arch/x86/compare258_sse.c +++ b/arch/x86/compare258_sse.c @@ -26,7 +26,7 @@ #endif /* UNALIGNED_OK, SSE4.2 intrinsic comparison */ -int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) { #ifdef _MSC_VER const unsigned char *src0start = src0; const unsigned char *src0end = src0 + 256; @@ -112,4 +112,13 @@ int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char #endif } +int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) { + return compare258_unaligned_sse4_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_sse4 +#define COMPARE258 compare258_unaligned_sse4_static + +#include "match_p.h" + #endif diff --git a/compare258.c b/compare258.c index b538277a1..d32e1deff 100644 --- a/compare258.c +++ b/compare258.c @@ -9,7 +9,7 @@ #include "fallback_builtins.h" /* ALIGNED, byte comparison */ -int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) { const unsigned char *src0start = src0; const unsigned char *src0end = src0 + 258; @@ -37,9 +37,18 @@ int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) { return (int32_t)(src0 - src0start); } +int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) { + return compare258_c_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_c +#define COMPARE258 compare258_c_static + +#include "match_p.h" + #ifdef UNALIGNED_OK /* UNALIGNED_OK, 16-bit integer comparison */ -int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) { const unsigned char *src0start = src0; const unsigned char *src0end = src0 + 258; @@ -61,9 +70,18 @@ int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char * return (int32_t)(src0 - src0start); } +int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) { + return compare258_unaligned_16_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_16 +#define COMPARE258 compare258_unaligned_16_static + +#include "match_p.h" + #ifdef HAVE_BUILTIN_CTZ /* UNALIGNED_OK, 32-bit integer comparison */ -int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) { const unsigned char *src0start = src0; const unsigned char *src0end = src0 + 256; @@ -88,11 +106,20 @@ int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char * return (int32_t)(src0 - src0start); } +int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) { + return compare258_unaligned_32_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_32 +#define COMPARE258 compare258_unaligned_32_static + +#include "match_p.h" + #endif #ifdef HAVE_BUILTIN_CTZLL /* UNALIGNED_OK, 64-bit integer comparison */ -int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) { const unsigned char *src0start = src0; const unsigned char *src0end = src0 + 256; @@ -117,6 +144,15 @@ int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char * return (int32_t)(src0 - src0start); } +int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) { + return compare258_unaligned_64_static(src0, src1); +} + +#define LONGEST_MATCH longest_match_unaligned_64 +#define COMPARE258 compare258_unaligned_64_static + +#include "match_p.h" + #endif #endif diff --git a/deflate_fast.c b/deflate_fast.c index 4fdad4fdc..12244fe73 100644 --- a/deflate_fast.c +++ b/deflate_fast.c @@ -7,7 +7,6 @@ #include "zbuild.h" #include "deflate.h" #include "deflate_p.h" -#include "match_p.h" #include "functable.h" /* =========================================================================== @@ -52,7 +51,7 @@ ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - s->match_length = longest_match(s, hash_head); + s->match_length = functable.longest_match(s, hash_head); /* longest_match() sets match_start */ } if (s->match_length >= MIN_MATCH) { diff --git a/deflate_medium.c b/deflate_medium.c index 3a13afbd7..f58e49982 100644 --- a/deflate_medium.c +++ b/deflate_medium.c @@ -11,7 +11,6 @@ #include "zbuild.h" #include "deflate.h" #include "deflate_p.h" -#include "match_p.h" #include "functable.h" struct match { @@ -220,7 +219,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - current_match.match_length = longest_match(s, hash_head); + current_match.match_length = functable.longest_match(s, hash_head); current_match.match_start = s->match_start; if (current_match.match_length < MIN_MATCH) current_match.match_length = 1; @@ -252,7 +251,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - next_match.match_length = longest_match(s, hash_head); + next_match.match_length = functable.longest_match(s, hash_head); next_match.match_start = s->match_start; if (next_match.match_start >= next_match.strstart) { /* this can happen due to some restarts */ diff --git a/deflate_slow.c b/deflate_slow.c index 1e5bffc0c..76f031ec8 100644 --- a/deflate_slow.c +++ b/deflate_slow.c @@ -7,7 +7,6 @@ #include "zbuild.h" #include "deflate.h" #include "deflate_p.h" -#include "match_p.h" #include "functable.h" /* =========================================================================== @@ -62,7 +61,7 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - s->match_length = longest_match(s, hash_head); + s->match_length = functable.longest_match(s, hash_head); /* longest_match() sets match_start */ if (s->match_length <= 5 && (s->strategy == Z_FILTERED diff --git a/functable.c b/functable.c index a95cfc550..17cc81259 100644 --- a/functable.c +++ b/functable.c @@ -77,6 +77,20 @@ extern int32_t compare258_unaligned_avx2(const unsigned char *src0, const unsign #endif #endif +/* longest_match */ +extern int32_t longest_match_c(deflate_state *const s, IPos cur_match); +#ifdef UNALIGNED_OK +extern int32_t longest_match_unaligned_16(deflate_state *const s, IPos cur_match); +extern int32_t longest_match_unaligned_32(deflate_state *const s, IPos cur_match); +extern int32_t longest_match_unaligned_64(deflate_state *const s, IPos cur_match); +#ifdef X86_SSE42_CMP_STR +extern int32_t longest_match_unaligned_sse4(deflate_state *const s, IPos cur_match); +#endif +#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) +extern int32_t longest_match_unaligned_avx2(deflate_state *const s, IPos cur_match); +#endif +#endif + /* stub definitions */ ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count); ZLIB_INTERNAL Pos quick_insert_string_stub(deflate_state *const s, const Pos str); @@ -84,6 +98,7 @@ ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, si ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len); ZLIB_INTERNAL void slide_hash_stub(deflate_state *s); ZLIB_INTERNAL int32_t compare258_stub(const unsigned char *src0, const unsigned char *src1); +ZLIB_INTERNAL int32_t longest_match_stub(deflate_state *const s, IPos cur_match); /* functable init */ ZLIB_INTERNAL __thread struct functable_s functable = { @@ -92,7 +107,8 @@ ZLIB_INTERNAL __thread struct functable_s functable = { adler32_stub, crc32_stub, slide_hash_stub, - compare258_stub + compare258_stub, + longest_match_stub }; ZLIB_INTERNAL void cpu_check_features(void) @@ -235,3 +251,28 @@ ZLIB_INTERNAL int32_t compare258_stub(const unsigned char *src0, const unsigned return functable.compare258(src0, src1); } +ZLIB_INTERNAL int32_t longest_match_stub(deflate_state *const s, IPos cur_match) { + + functable.longest_match = &longest_match_c; + +#ifdef UNALIGNED_OK +# ifdef HAVE_BUILTIN_CTZLL + functable.longest_match = &longest_match_unaligned_64; +# elif defined(HAVE_BUILTIN_CTZ) + functable.longest_match = &longest_match_unaligned_32; +# else + functable.longest_match = &longest_match_unaligned_16; +# endif +# ifdef X86_SSE42_CMP_STR + if (x86_cpu_has_sse42) + functable.longest_match = &longest_match_unaligned_sse4; +# endif +# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) + if (x86_cpu_has_avx2) + functable.longest_match = &longest_match_unaligned_avx2; +# endif +#endif + + return functable.longest_match(s, cur_match); +} + diff --git a/functable.h b/functable.h index 42881d5b8..c36d85bae 100644 --- a/functable.h +++ b/functable.h @@ -15,6 +15,7 @@ struct functable_s { uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len); void (* slide_hash) (deflate_state *s); int32_t (* compare258) (const unsigned char *src0, const unsigned char *src1); + int32_t (* longest_match) (deflate_state *const s, IPos cur_match); }; ZLIB_INTERNAL extern __thread struct functable_s functable; diff --git a/match_p.h b/match_p.h index 98b32811e..09726be44 100644 --- a/match_p.h +++ b/match_p.h @@ -29,7 +29,7 @@ typedef uint8_t bestcmp_t; * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1 * OUT assertion: the match length is not greater than s->lookahead */ -static inline unsigned longest_match(deflate_state *const s, IPos cur_match) { +int32_t LONGEST_MATCH(deflate_state *const s, IPos cur_match) { unsigned int strstart = s->strstart; const unsigned wmask = s->w_mask; unsigned char *window = s->window; @@ -104,7 +104,7 @@ static inline unsigned longest_match(deflate_state *const s, IPos cur_match) { if (!cont) break; - len = functable.compare258(scan, match); + len = COMPARE258(scan, match); Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan"); if (len > best_len) {