deflate.h
deflate_p.h
functable.h
- inffast.h
+ inffast_tpl.h
inffixed_tbl.h
inflate.h
inflate_p.h
deflate_stored.c
functable.c
infback.c
- inffast.c
inflate.c
inftrees.c
insert_string.c
deflate_stored.o \
functable.o \
infback.o \
- inffast.o \
inflate.o \
inftrees.o \
insert_string.o \
deflate_stored.lo \
functable.lo \
infback.lo \
- inffast.lo \
inflate.lo \
inftrees.lo \
insert_string.lo \
#include "chunkset_tpl.h"
+#define INFLATE_FAST inflate_fast_neon
+
+#include "inffast_tpl.h"
+
#endif
#include "chunkset_tpl.h"
+#define INFLATE_FAST inflate_fast_power8
+
+#include "inffast_tpl.h"
+
#endif
#include "chunkset_tpl.h"
+#define INFLATE_FAST inflate_fast_avx
+
+#include "inffast_tpl.h"
+
#endif
#include "chunkset_tpl.h"
+#define INFLATE_FAST inflate_fast_sse2
+
+#include "inffast_tpl.h"
+
#endif
#include "chunkset_tpl.h"
+#define INFLATE_FAST inflate_fast_sse41
+
+#include "inffast_tpl.h"
+
#endif
#define CHUNKMEMSET_SAFE chunkmemset_safe_c
#include "chunkset_tpl.h"
+
+#define INFLATE_FAST inflate_fast_c
+
+#include "inffast_tpl.h"
extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
+/* inflate fast loop */
+extern void inflate_fast_c(void *strm, uint32_t start);
+#ifdef X86_SSE2_CHUNKSET
+extern void inflate_fast_sse2(void *strm, uint32_t start);
+#endif
+#ifdef X86_SSE41
+extern void inflate_fast_sse41(void *strm, uint32_t start);
+#endif
+#ifdef X86_AVX_CHUNKSET
+extern void inflate_fast_avx(void *strm, uint32_t start);
+#endif
+#ifdef ARM_NEON_CHUNKSET
+extern void inflate_fast_neon(void *strm, uint32_t start);
+#endif
+#ifdef POWER8_VSX_CHUNKSET
+extern void inflate_fast_power8(void *strm, uint32_t start);
+#endif
+
/* CRC32 */
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
return functable.compare256(src0, src1);
}
+Z_INTERNAL void inflate_fast_stub(void *strm, uint32_t start) {
+ functable.inflate_fast = &inflate_fast_c;
+
+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+ if (x86_cpu_has_sse2)
+# endif
+ functable.inflate_fast = &inflate_fast_sse2;
+#endif
+#if defined(X86_SSE41) && defined(X86_SSE2)
+ if (x86_cpu_has_sse41)
+ functable.inflate_fast = &inflate_fast_sse41;
+#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.inflate_fast = &inflate_fast_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+ if (arm_cpu_has_neon)
+ functable.inflate_fast = &inflate_fast_neon;
+#endif
+#ifdef POWER8_VSX_CHUNKSET
+ if (power_cpu_has_arch_2_07)
+ functable.inflate_fast = &inflate_fast_power8;
+#endif
+
+ functable.inflate_fast(strm, start);
+}
+
/* functable init */
Z_INTERNAL Z_TLS struct functable_s functable = {
adler32_stub,
chunkunroll_stub,
chunkmemset_stub,
chunkmemset_safe_stub,
+ inflate_fast_stub,
insert_string_stub,
longest_match_stub,
longest_match_slow_stub,
uint8_t* (* chunkunroll) (uint8_t *out, unsigned *dist, unsigned *len);
uint8_t* (* chunkmemset) (uint8_t *out, unsigned dist, unsigned len);
uint8_t* (* chunkmemset_safe) (uint8_t *out, unsigned dist, unsigned len, unsigned left);
+ void (* inflate_fast) (void *strm, uint32_t start);
void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count);
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match);
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
#include "inflate_p.h"
#include "functable.h"
RESTORE();
if (state->whave < state->wsize)
state->whave = state->wsize - left;
- zng_inflate_fast(strm, state->wsize);
+ functable.inflate_fast(strm, state->wsize);
LOAD();
break;
}
+++ /dev/null
-#ifndef INFFAST_H_
-#define INFFAST_H_
-/* inffast.h -- header to use inffast.c
- * Copyright (C) 1995-2003, 2010 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start);
-
-#define INFLATE_FAST_MIN_HAVE 8
-#define INFLATE_FAST_MIN_LEFT 258
-
-#endif /* INFFAST_H_ */
*/
#include "zbuild.h"
+#include "zendian.h"
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
#include "inflate_p.h"
#include "functable.h"
-/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
-static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
- uint64_t chunk;
- memcpy(&chunk, in, sizeof(chunk));
-
-#if BYTE_ORDER == LITTLE_ENDIAN
- return chunk << bits;
-#else
- return ZSWAP64(chunk) << bits;
-#endif
-}
/*
Decode literal, length, and distance codes and write out the resulting
literal and match bytes until either not enough input or output is
requires strm->avail_out >= 258 for each loop to avoid checking for
output space.
*/
-void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) {
+void Z_INTERNAL INFLATE_FAST(PREFIX3(stream) *strm, uint32_t start) {
/* start: inflate()'s starting value for strm->avail_out */
struct inflate_state *state;
z_const unsigned char *in; /* local strm->next_in */
if (op < len) { /* still need some from output */
len -= op;
out = chunkcopy_safe(out, from, op, safe);
- out = functable.chunkunroll(out, &dist, &len);
+ out = CHUNKUNROLL(out, &dist, &len);
out = chunkcopy_safe(out, out - dist, len, safe);
} else {
out = chunkcopy_safe(out, from, len, safe);
if (dist >= len || dist >= state->chunksize)
out = chunkcopy_safe(out, out - dist, len, safe);
else
- out = functable.chunkmemset_safe(out, dist, len, (unsigned)((safe - out) + 1));
+ out = CHUNKMEMSET_SAFE(out, dist, len, (unsigned)((safe - out) + 1));
} else {
/* Whole reference is in range of current output. No range checks are
necessary because we start with room for at least 258 bytes of output,
as they stay within 258 bytes of `out`.
*/
if (dist >= len || dist >= state->chunksize)
- out = functable.chunkcopy(out, out - dist, len);
+ out = CHUNKCOPY(out, out - dist, len);
else
- out = functable.chunkmemset(out, dist, len);
+ out = CHUNKMEMSET(out, dist, len);
}
} else if ((op & 64) == 0) { /* 2nd level distance code */
here = dcode + here->val + BITS(op);
#include "cpu_features.h"
#include "inftrees.h"
#include "inflate.h"
-#include "inffast.h"
#include "inflate_p.h"
#include "inffixed_tbl.h"
#include "functable.h"
/* use inflate_fast() if we have enough input and output */
if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_LEFT) {
RESTORE();
- zng_inflate_fast(strm, out);
+ functable.inflate_fast(strm, out);
LOAD();
if (state->mode == TYPE)
state->back = -1;
strm->msg = (char *)errmsg; \
} while (0)
+#define INFLATE_FAST_MIN_HAVE 8
+#define INFLATE_FAST_MIN_LEFT 258
+
+/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
+static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
+ uint64_t chunk;
+ memcpy(&chunk, in, sizeof(chunk));
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ return chunk << bits;
+#else
+ return ZSWAP64(chunk) << bits;
+#endif
+}
+
/* Behave like chunkcopy, but avoid writing beyond of legal output. */
static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, uint8_t *safe) {
uint64_t safelen = (safe - out) + 1;
infback.obj \
inflate.obj \
inftrees.obj \
- inffast.obj \
insert_string.obj \
insert_string_roll.obj \
slide_hash.obj \
deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
-infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h
-inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h
-inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
+infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h
+inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
slide_hash_neon.obj: $(SRCDIR)/arch/arm/slide_hash_neon.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
infback.obj \
inflate.obj \
inftrees.obj \
- inffast.obj \
insert_string.obj \
insert_string_roll.obj \
slide_hash.obj \
deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
-infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h
-inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h
-inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
+infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h
+inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h
infback.obj \
inflate.obj \
inftrees.obj \
- inffast.obj \
insert_string.obj \
insert_string_roll.obj \
insert_string_sse42.obj \
deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
-infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h
-inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h
-inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
+infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h
+inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
slide_hash_avx2.obj: $(SRCDIR)/arch/x86/slide_hash_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h