From: Mika Lindqvist Date: Mon, 4 Apr 2016 14:01:39 +0000 (+0300) Subject: Move instruction set specific crc32 code to arch directories. X-Git-Tag: 1.9.9-b1~694^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=debd37a1c0d2ff3d0b395c067ecd8adcf51770ef;p=thirdparty%2Fzlib-ng.git Move instruction set specific crc32 code to arch directories. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 432684c2b..c840b8b62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,7 +312,7 @@ set(ZLIB_ARCH_SRCS) set(ARCHDIR "arch/generic") if("${ARCH}" MATCHES "x86_64" OR "${ARCH}" MATCHES "AMD64") set(ARCHDIR "arch/x86") - add_definitions(-DX86_64 -DX86_NOCHECK_SSE2 -DUNALIGNED_OK -DUNROLL_LESS -DX86_CPUID) + add_definitions(-DX86_64 -DX86_NOCHECK_SSE2 -DUNALIGNED_OK -DUNROLL_LESS) add_feature_info(SSE2 1 "Use the SSE2 instruction set, using \"${SSE2FLAG}\"") elseif("${ARCH}" MATCHES "arm") set(ARCHDIR "arch/arm") @@ -324,6 +324,7 @@ else() endif() if("${ARCHDIR}" MATCHES "arch/x86" AND WITH_OPTIM) + add_definitions("-DX86_CPUID") set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c) if(HAVE_SSE42_INTRIN) add_definitions(-DX86_SSE4_2_CRC_HASH) @@ -345,7 +346,7 @@ if("${ARCHDIR}" MATCHES "arch/x86" AND WITH_OPTIM) endif() if(HAVE_PCLMULQDQ_INTRIN) add_definitions(-DX86_PCLMULQDQ_CRC) - set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc_folding.c) + set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc_folding.c ${ARCHDIR}/crc_pclmulqdq.c) add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE4FLAG} ${PCLMULFLAG}\"") add_intrinsics_option(${PCLMULFLAG}) if(NOT HAVE_SSE42_INTRIN) diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in index 3604ba852..b327a0506 100644 --- a/arch/x86/Makefile.in +++ b/arch/x86/Makefile.in @@ -7,45 +7,50 @@ CFLAGS= SFLAGS= INCLUDES= -SRCDIR= -SRCTOP= +SRCDIR=. +SRCTOP=../.. all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo -x86.o: +x86.o: $(SRCDIR)/x86.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c -x86.lo: +x86.lo: $(SRCDIR)/x86.c $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c -fill_window_sse.o: +fill_window_sse.o: $(SRCDIR)/fill_window_sse.c $(CC) $(CFLAGS) -msse2 $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c -fill_window_sse.lo: +fill_window_sse.lo: $(SRCDIR)/fill_window_sse.c $(CC) $(SFLAGS) -msse2 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c -deflate_quick.o: +deflate_quick.o: $(SRCDIR)/deflate_quick.c $(CC) $(CFLAGS) -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c -deflate_quick.lo: +deflate_quick.lo: $(SRCDIR)/deflate_quick.c $(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c -insert_string_sse.o: +insert_string_sse.o: $(SRCDIR)/insert_string_sse.c $(CC) $(CFLAGS) -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c -insert_string_sse.lo: +insert_string_sse.lo: $(SRCDIR)/insert_string_sse.c $(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c -crc_folding.o: +crc_folding.o: $(SRCDIR)/crc_folding.c $(CC) $(CFLAGS) -mpclmul -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c -crc_folding.lo: +crc_folding.lo: $(SRCDIR)/crc_folding.c $(CC) $(SFLAGS) -mpclmul -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c +crc_pclmulqdq.o: $(SRCDIR)/crc_pclmulqdq.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc_pclmulqdq.c + +crc_pclmulqdq.lo: $(SRCDIR)/crc_pclmulqdq.c + $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_pclmulqdq.c mostlyclean: clean clean: - rm -f *.o *.lo *~ \ + rm -f *.o *.lo *~ rm -rf objs rm -f *.gcda *.gcno *.gcov diff --git a/arch/x86/crc_folding.c b/arch/x86/crc_folding.c index 957754de5..fe9c4d913 100644 --- a/arch/x86/crc_folding.c +++ b/arch/x86/crc_folding.c @@ -22,7 +22,7 @@ #include #include -#include "deflate.h" +#include "crc_folding.h" #define CRC_LOAD(s) \ diff --git a/arch/x86/crc_folding.h b/arch/x86/crc_folding.h new file mode 100644 index 000000000..22bfa9864 --- /dev/null +++ b/arch/x86/crc_folding.h @@ -0,0 +1,19 @@ +/* crc_folding.h + * + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ + * instruction. + * + * Copyright (C) 2013 Intel Corporation Jim Kukunas + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef CRC_FOLDING_H_ +#define CRC_FOLDING_H_ + +#include "deflate.h" + +ZLIB_INTERNAL void crc_fold_init(deflate_state *const); +ZLIB_INTERNAL uint32_t crc_fold_512to32(deflate_state *const); +ZLIB_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long); + +#endif diff --git a/arch/x86/crc_pclmulqdq.c b/arch/x86/crc_pclmulqdq.c new file mode 100644 index 000000000..04281ca5c --- /dev/null +++ b/arch/x86/crc_pclmulqdq.c @@ -0,0 +1,33 @@ +/* crc_pclmulqdq.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + */ + +#include "x86.h" +#include "crc_folding.h" +#include "deflate.h" + +#ifdef X86_PCLMULQDQ_CRC +ZLIB_INTERNAL void crc_reset(deflate_state *const s) { + if (x86_cpu_has_pclmulqdq) { + crc_fold_init(s); + return; + } + s->strm->adler = crc32(0L, Z_NULL, 0); +} + +ZLIB_INTERNAL void crc_finalize(deflate_state *const s) { + if (x86_cpu_has_pclmulqdq) + s->strm->adler = crc_fold_512to32(s); +} + +ZLIB_INTERNAL void copy_with_crc(z_stream *strm, unsigned char *dst, unsigned long size) { + if (x86_cpu_has_pclmulqdq) { + crc_fold_copy(strm->state, dst, strm->next_in, size); + return; + } + memcpy(dst, strm->next_in, size); + strm->adler = crc32(strm->adler, dst, size); +} +#endif diff --git a/configure b/configure index 1f49eff5c..8d8b5bdad 100755 --- a/configure +++ b/configure @@ -756,8 +756,8 @@ case "${ARCH}" in if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC" SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o crc_pclmulqdq.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo crc_pclmulqdq.lo" fi # Enable deflate_quick at level 1? diff --git a/crc32.c b/crc32.c index d469601c0..828459094 100644 --- a/crc32.c +++ b/crc32.c @@ -63,10 +63,12 @@ #include "deflate.h" +ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, z_off64_t); + #if BYTE_ORDER == LITTLE_ENDIAN -static uint32_t crc32_little(uint32_t, const unsigned char *, size_t); +ZLIB_INTERNAL uint32_t crc32_little(uint32_t, const unsigned char *, size_t); #elif BYTE_ORDER == BIG_ENDIAN -static uint32_t crc32_big(uint32_t, const unsigned char *, size_t); +ZLIB_INTERNAL uint32_t crc32_big(uint32_t, const unsigned char *, size_t); #endif /* Local functions for crc concatenation */ @@ -205,12 +207,6 @@ const uint32_t * ZEXPORT get_crc_table(void) { return (const uint32_t *)crc_table; } -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 -#define DO4 DO1; DO1; DO1; DO1 - -/* ========================================================================= */ uint32_t ZEXPORT crc32_z(uint32_t crc, const unsigned char *buf, size_t len) { if (buf == NULL) return 0; @@ -226,6 +222,18 @@ uint32_t ZEXPORT crc32_z(uint32_t crc, const unsigned char *buf, size_t len) { return crc32_big(crc, buf, len); #endif } + + return crc32_generic(crc, buf, len); +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 +#define DO4 DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, z_off64_t len) +{ crc = crc ^ 0xffffffff; #ifdef UNROLL_LESS @@ -270,7 +278,7 @@ uint32_t ZEXPORT crc32(uint32_t crc, const unsigned char *buf, uint32_t len) { #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 /* ========================================================================= */ -static uint32_t crc32_little(uint32_t crc, const unsigned char *buf, size_t len) { +ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, size_t len) { register uint32_t c; register const uint32_t *buf4; @@ -312,7 +320,7 @@ static uint32_t crc32_little(uint32_t crc, const unsigned char *buf, size_t len) #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 /* ========================================================================= */ -static uint32_t crc32_big(uint32_t crc, const unsigned char *buf, size_t len) { +ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, size_t len) { register uint32_t c; register const uint32_t *buf4; @@ -432,40 +440,14 @@ uint32_t ZEXPORT crc32_combine64(uint32_t crc1, uint32_t crc2, z_off64_t len2) { return crc32_combine_(crc1, crc2, len2); } - -#ifdef X86_PCLMULQDQ_CRC -#include "arch/x86/x86.h" -extern void ZLIB_INTERNAL crc_fold_init(deflate_state *const s); -extern void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, - unsigned char *dst, const unsigned char *src, long len); -extern uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s); -#endif - +#ifndef X86_PCLMULQDQ_CRC ZLIB_INTERNAL void crc_reset(deflate_state *const s) { -#ifdef X86_PCLMULQDQ_CRC - if (x86_cpu_has_pclmulqdq) { - crc_fold_init(s); - return; - } -#endif s->strm->adler = crc32(0L, Z_NULL, 0); } -ZLIB_INTERNAL void crc_finalize(deflate_state *const s) { -#ifdef X86_PCLMULQDQ_CRC - if (x86_cpu_has_pclmulqdq) - s->strm->adler = crc_fold_512to32(s); -#endif -} - -ZLIB_INTERNAL void copy_with_crc(z_stream *strm, unsigned char *dst, long size) { -#ifdef X86_PCLMULQDQ_CRC - if (x86_cpu_has_pclmulqdq) { - crc_fold_copy(strm->state, dst, strm->next_in, size); - return; - } -#endif +ZLIB_INTERNAL void copy_with_crc(z_stream *strm, unsigned char *dst, unsigned long size) { memcpy(dst, strm->next_in, size); strm->adler = crc32(strm->adler, dst, size); } +#endif diff --git a/deflate.c b/deflate.c index 21d4f414c..8ca21c523 100644 --- a/deflate.c +++ b/deflate.c @@ -85,8 +85,10 @@ ZLIB_INTERNAL void flush_pending (z_stream *strm); ZLIB_INTERNAL unsigned read_buf (z_stream *strm, unsigned char *buf, unsigned size); extern void crc_reset(deflate_state *const s); +#ifdef X86_PCLMULQDQ_CRC extern void crc_finalize(deflate_state *const s); -extern void copy_with_crc(z_stream *strm, unsigned char *dst, long size); +#endif +extern void copy_with_crc(z_stream *strm, unsigned char *dst, unsigned long size); /* =========================================================================== * Local data @@ -991,7 +993,9 @@ int ZEXPORT deflate(z_stream *strm, int flush) { /* Write the trailer */ #ifdef GZIP if (s->wrap == 2) { +# ifdef X86_PCLMULQDQ_CRC crc_finalize(s); +# endif put_byte(s, (unsigned char)(strm->adler & 0xff)); put_byte(s, (unsigned char)((strm->adler >> 8) & 0xff)); put_byte(s, (unsigned char)((strm->adler >> 16) & 0xff)); diff --git a/win32/Makefile.msc b/win32/Makefile.msc index b6375ccb4..8a34354db 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -31,7 +31,7 @@ DEFFILE = zlib.def WITH_GZFILEOP = OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \ - infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj + infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj crc_pclmulqdq.obj !if "$(WITH_GZFILEOP)" != "" WFLAGS = $(WFLAGS) -DWITH_GZFILEOP OBJS = $(OBJS) gzclose.obj gzlib.obj gzread.obj gzwrite.obj