set(ARCHDIR "arch/generic")
if("${ARCH}" MATCHES "x86_64" OR "${ARCH}" MATCHES "AMD64")
set(ARCHDIR "arch/x86")
- add_definitions(-DX86_64 -DX86_NOCHECK_SSE2 -DUNALIGNED_OK -DUNROLL_LESS -DX86_CPUID)
+ add_definitions(-DX86_64 -DX86_NOCHECK_SSE2 -DUNALIGNED_OK -DUNROLL_LESS)
add_feature_info(SSE2 1 "Use the SSE2 instruction set, using \"${SSE2FLAG}\"")
elseif("${ARCH}" MATCHES "arm")
set(ARCHDIR "arch/arm")
endif()
if("${ARCHDIR}" MATCHES "arch/x86" AND WITH_OPTIM)
+ add_definitions("-DX86_CPUID")
set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c)
if(HAVE_SSE42_INTRIN)
add_definitions(-DX86_SSE4_2_CRC_HASH)
endif()
if(HAVE_PCLMULQDQ_INTRIN)
add_definitions(-DX86_PCLMULQDQ_CRC)
- set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc_folding.c)
+ set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc_folding.c ${ARCHDIR}/crc_pclmulqdq.c)
add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE4FLAG} ${PCLMULFLAG}\"")
add_intrinsics_option(${PCLMULFLAG})
if(NOT HAVE_SSE42_INTRIN)
SFLAGS=
INCLUDES=
-SRCDIR=
-SRCTOP=
+SRCDIR=.
+SRCTOP=../..
all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
-x86.o:
+x86.o: $(SRCDIR)/x86.c
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
-x86.lo:
+x86.lo: $(SRCDIR)/x86.c
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
-fill_window_sse.o:
+fill_window_sse.o: $(SRCDIR)/fill_window_sse.c
$(CC) $(CFLAGS) -msse2 $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
-fill_window_sse.lo:
+fill_window_sse.lo: $(SRCDIR)/fill_window_sse.c
$(CC) $(SFLAGS) -msse2 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
-deflate_quick.o:
+deflate_quick.o: $(SRCDIR)/deflate_quick.c
$(CC) $(CFLAGS) -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
-deflate_quick.lo:
+deflate_quick.lo: $(SRCDIR)/deflate_quick.c
$(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
-insert_string_sse.o:
+insert_string_sse.o: $(SRCDIR)/insert_string_sse.c
$(CC) $(CFLAGS) -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
-insert_string_sse.lo:
+insert_string_sse.lo: $(SRCDIR)/insert_string_sse.c
$(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
-crc_folding.o:
+crc_folding.o: $(SRCDIR)/crc_folding.c
$(CC) $(CFLAGS) -mpclmul -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
-crc_folding.lo:
+crc_folding.lo: $(SRCDIR)/crc_folding.c
$(CC) $(SFLAGS) -mpclmul -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
+crc_pclmulqdq.o: $(SRCDIR)/crc_pclmulqdq.c
+ $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc_pclmulqdq.c
+
+crc_pclmulqdq.lo: $(SRCDIR)/crc_pclmulqdq.c
+ $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_pclmulqdq.c
mostlyclean: clean
clean:
- rm -f *.o *.lo *~ \
+ rm -f *.o *.lo *~
rm -rf objs
rm -f *.gcda *.gcno *.gcov
#include <immintrin.h>
#include <wmmintrin.h>
-#include "deflate.h"
+#include "crc_folding.h"
#define CRC_LOAD(s) \
--- /dev/null
+/* crc_folding.h
+ *
+ * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ
+ * instruction.
+ *
+ * Copyright (C) 2013 Intel Corporation Jim Kukunas
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef CRC_FOLDING_H_
+#define CRC_FOLDING_H_
+
+#include "deflate.h"
+
+ZLIB_INTERNAL void crc_fold_init(deflate_state *const);
+ZLIB_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
+ZLIB_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
+
+#endif
--- /dev/null
+/* crc_pclmulqdq.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ */
+
+#include "x86.h"
+#include "crc_folding.h"
+#include "deflate.h"
+
+#ifdef X86_PCLMULQDQ_CRC
+ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
+ if (x86_cpu_has_pclmulqdq) {
+ crc_fold_init(s);
+ return;
+ }
+ s->strm->adler = crc32(0L, Z_NULL, 0);
+}
+
+ZLIB_INTERNAL void crc_finalize(deflate_state *const s) {
+ if (x86_cpu_has_pclmulqdq)
+ s->strm->adler = crc_fold_512to32(s);
+}
+
+ZLIB_INTERNAL void copy_with_crc(z_stream *strm, unsigned char *dst, unsigned long size) {
+ if (x86_cpu_has_pclmulqdq) {
+ crc_fold_copy(strm->state, dst, strm->next_in, size);
+ return;
+ }
+ memcpy(dst, strm->next_in, size);
+ strm->adler = crc32(strm->adler, dst, size);
+}
+#endif
if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o crc_pclmulqdq.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo crc_pclmulqdq.lo"
fi
# Enable deflate_quick at level 1?
#include "deflate.h"
+ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, z_off64_t);
+
#if BYTE_ORDER == LITTLE_ENDIAN
-static uint32_t crc32_little(uint32_t, const unsigned char *, size_t);
+ZLIB_INTERNAL uint32_t crc32_little(uint32_t, const unsigned char *, size_t);
#elif BYTE_ORDER == BIG_ENDIAN
-static uint32_t crc32_big(uint32_t, const unsigned char *, size_t);
+ZLIB_INTERNAL uint32_t crc32_big(uint32_t, const unsigned char *, size_t);
#endif
/* Local functions for crc concatenation */
return (const uint32_t *)crc_table;
}
-/* ========================================================================= */
-#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
-#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
-#define DO4 DO1; DO1; DO1; DO1
-
-/* ========================================================================= */
uint32_t ZEXPORT crc32_z(uint32_t crc, const unsigned char *buf, size_t len) {
if (buf == NULL) return 0;
return crc32_big(crc, buf, len);
#endif
}
+
+ return crc32_generic(crc, buf, len);
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+#define DO4 DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, z_off64_t len)
+{
crc = crc ^ 0xffffffff;
#ifdef UNROLL_LESS
#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
/* ========================================================================= */
-static uint32_t crc32_little(uint32_t crc, const unsigned char *buf, size_t len) {
+ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, size_t len) {
register uint32_t c;
register const uint32_t *buf4;
#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
/* ========================================================================= */
-static uint32_t crc32_big(uint32_t crc, const unsigned char *buf, size_t len) {
+ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, size_t len) {
register uint32_t c;
register const uint32_t *buf4;
return crc32_combine_(crc1, crc2, len2);
}
-
-#ifdef X86_PCLMULQDQ_CRC
-#include "arch/x86/x86.h"
-extern void ZLIB_INTERNAL crc_fold_init(deflate_state *const s);
-extern void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s,
- unsigned char *dst, const unsigned char *src, long len);
-extern uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s);
-#endif
-
+#ifndef X86_PCLMULQDQ_CRC
ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
-#ifdef X86_PCLMULQDQ_CRC
- if (x86_cpu_has_pclmulqdq) {
- crc_fold_init(s);
- return;
- }
-#endif
s->strm->adler = crc32(0L, Z_NULL, 0);
}
-ZLIB_INTERNAL void crc_finalize(deflate_state *const s) {
-#ifdef X86_PCLMULQDQ_CRC
- if (x86_cpu_has_pclmulqdq)
- s->strm->adler = crc_fold_512to32(s);
-#endif
-}
-
-ZLIB_INTERNAL void copy_with_crc(z_stream *strm, unsigned char *dst, long size) {
-#ifdef X86_PCLMULQDQ_CRC
- if (x86_cpu_has_pclmulqdq) {
- crc_fold_copy(strm->state, dst, strm->next_in, size);
- return;
- }
-#endif
+ZLIB_INTERNAL void copy_with_crc(z_stream *strm, unsigned char *dst, unsigned long size) {
memcpy(dst, strm->next_in, size);
strm->adler = crc32(strm->adler, dst, size);
}
+#endif
ZLIB_INTERNAL unsigned read_buf (z_stream *strm, unsigned char *buf, unsigned size);
extern void crc_reset(deflate_state *const s);
+#ifdef X86_PCLMULQDQ_CRC
extern void crc_finalize(deflate_state *const s);
-extern void copy_with_crc(z_stream *strm, unsigned char *dst, long size);
+#endif
+extern void copy_with_crc(z_stream *strm, unsigned char *dst, unsigned long size);
/* ===========================================================================
* Local data
/* Write the trailer */
#ifdef GZIP
if (s->wrap == 2) {
+# ifdef X86_PCLMULQDQ_CRC
crc_finalize(s);
+# endif
put_byte(s, (unsigned char)(strm->adler & 0xff));
put_byte(s, (unsigned char)((strm->adler >> 8) & 0xff));
put_byte(s, (unsigned char)((strm->adler >> 16) & 0xff));
WITH_GZFILEOP =
OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
- infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj
+ infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj crc_pclmulqdq.obj
!if "$(WITH_GZFILEOP)" != ""
WFLAGS = $(WFLAGS) -DWITH_GZFILEOP
OBJS = $(OBJS) gzclose.obj gzlib.obj gzread.obj gzwrite.obj