From: Mika Lindqvist Date: Tue, 3 Nov 2015 21:09:40 +0000 (+0200) Subject: Enable deflate_quick under MSVC. X-Git-Tag: 1.9.9-b1~793^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9e37e38bc2e640818281bbfb2687dd9356b8aa95;p=thirdparty%2Fzlib-ng.git Enable deflate_quick under MSVC. --- diff --git a/arch/x86/deflate_quick.c b/arch/x86/deflate_quick.c index f9489822c..3088200ea 100644 --- a/arch/x86/deflate_quick.c +++ b/arch/x86/deflate_quick.c @@ -14,12 +14,51 @@ */ #include +#ifdef _MSC_VER +# include +#endif #include "deflate.h" extern void fill_window_sse(deflate_state *s); extern void flush_pending(z_stream *strm); local inline long compare258(const unsigned char *const src0, const unsigned char *const src1) { +#ifdef _MSC_VER + long cnt; + + cnt = 0; + do { +#define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY + + int ret; + __m128i xmm_src0, xmm_src1; + + xmm_src0 = _mm_loadu_si128((__m128i *)(src0 + cnt)); + xmm_src1 = _mm_loadu_si128((__m128i *)(src1 + cnt)); + ret = _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode); + if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) { + cnt += ret; + break; + } + cnt += 16; + + xmm_src0 = _mm_loadu_si128((__m128i *)(src0 + cnt)); + xmm_src1 = _mm_loadu_si128((__m128i *)(src1 + cnt)); + ret = _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode); + if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) { + cnt += ret; + break; + } + cnt += 16; + } while (cnt < 256); + + if (*(unsigned short *)(src0 + cnt) == *(unsigned short *)(src1 + cnt)) { + cnt += 2; + } else if (*(src0 + cnt) == *(src1 + cnt)) { + cnt++; + } + return cnt; +#else uintptr_t ax, dx, cx; __m128i xmm_src0; @@ -69,6 +108,7 @@ local inline long compare258(const unsigned char *const src0, const unsigned cha : "cc" ); return ax - 16; +#endif } local const unsigned quick_len_codes[MAX_MATCH-MIN_MATCH+1]; @@ -136,12 +176,16 @@ local inline Pos quick_insert_string(deflate_state *const s, const Pos str) { Pos ret; unsigned h = 0; +#ifdef _MSC_VER + h = _mm_crc32_u32(h, *(unsigned *)(s->window + str)); +#else __asm__ __volatile__ ( "crc32l (%[window], %[str], 1), %0\n\t" : "+r" (h) : [window] "r" (s->window), [str] "r" ((uintptr_t)str) ); +#endif ret = s->head[h & s->hash_mask]; s->head[h & s->hash_mask] = str; diff --git a/win32/Makefile.msc b/win32/Makefile.msc index 10832948d..e79ce5267 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -23,14 +23,14 @@ AR = lib RC = rc CP = copy /y CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) -WFLAGS = -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -DX86_PCLMULQDQ_CRC -DX86_SSE2_FILL_WINDOW -DX86_CPUID -DX86_SSE4_2_CRC_HASH -DUNALIGNED_OK -DUNROLL_LESS +WFLAGS = -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -DX86_PCLMULQDQ_CRC -DX86_SSE2_FILL_WINDOW -DX86_CPUID -DX86_SSE4_2_CRC_HASH -DUNALIGNED_OK -DUNROLL_LESS -DX86_QUICK_STRATEGY LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest ARFLAGS = -nologo RCFLAGS = /dWIN32 /r DEFFILE = zlib.def WITH_GZFILEOP = -OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_slow.obj \ +OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \ infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj crc_folding.obj !if "$(WITH_GZFILEOP)" != "" WFLAGS = $(WFLAGS) -DWITH_GZFILEOP