#ifdef ARM_NEON
#include "neon_intrins.h"
#include "zbuild.h"
+#include "zmemory.h"
#include "arch/generic/chunk_permute_table.h"
typedef uint8x16_t chunk_t;
};
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- uint16_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
+ *chunk = vreinterpretq_u8_u16(vdupq_n_u16(zng_memread_2(from)));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- uint32_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
+ *chunk = vreinterpretq_u8_u32(vdupq_n_u32(zng_memread_4(from)));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- uint64_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
+ *chunk = vreinterpretq_u8_u64(vdupq_n_u64(zng_memread_8(from)));
}
#define CHUNKSIZE chunksize_neon
*/
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
-compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
+compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
-compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
+compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
*/
#include "zbuild.h"
+#include "zmemory.h"
typedef uint64_t chunk_t;
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- uint8_t *dest = (uint8_t *)chunk;
- memcpy(dest, from, sizeof(uint32_t));
- memcpy(dest+4, from, sizeof(uint32_t));
+ uint32_t tmp = zng_memread_4(from);
+ *chunk = tmp | ((chunk_t)tmp << 32);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- memcpy(chunk, from, sizeof(uint64_t));
+ *chunk = zng_memread_8(from);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
- memcpy(chunk, (uint8_t *)s, sizeof(uint64_t));
+ *chunk = zng_memread_8(s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
- memcpy(out, chunk, sizeof(uint64_t));
+ zng_memwrite_8(out, *chunk);
}
#define CHUNKSIZE chunksize_c
*/
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
do {
uint32_t sv, mv, diff;
- memcpy(&sv, src0, sizeof(sv));
- memcpy(&mv, src1, sizeof(mv));
+ sv = zng_memread_4(src0);
+ mv = zng_memread_4(src1);
diff = sv ^ mv;
if (diff) {
do {
uint64_t sv, mv, diff;
- memcpy(&sv, src0, sizeof(sv));
- memcpy(&mv, src1, sizeof(mv));
+ sv = zng_memread_8(src0);
+ mv = zng_memread_8(src1);
diff = sv ^ mv;
if (diff) {
#ifdef POWER8_VSX
#include <altivec.h>
#include "zbuild.h"
+#include "zmemory.h"
typedef vector unsigned char chunk_t;
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- uint16_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = (vector unsigned char)vec_splats(tmp);
+ *chunk = (vector unsigned char)vec_splats(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- uint32_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = (vector unsigned char)vec_splats(tmp);
+ *chunk = (vector unsigned char)vec_splats(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- uint64_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = (vector unsigned char)vec_splats((unsigned long long)tmp);
+ *chunk = (vector unsigned char)vec_splats((unsigned long long)zng_memread_8(from));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
#ifdef POWER9
#include <altivec.h>
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "zendian.h"
#ifdef RISCV_RVV
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
+#include "zmemory.h"
#ifdef X86_AVX2
#include "avx2_tables.h"
#define HAVE_HALF_CHUNK
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- int16_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm256_set1_epi16(tmp);
+ *chunk = _mm256_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- int32_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm256_set1_epi32(tmp);
+ *chunk = _mm256_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- int64_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm256_set1_epi64x(tmp);
+ *chunk = _mm256_set1_epi64x(zng_memread_8(from));
}
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
+#include "zmemory.h"
#ifdef X86_AVX512
}
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- int16_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm256_set1_epi16(tmp);
+ *chunk = _mm256_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- int32_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm256_set1_epi32(tmp);
+ *chunk = _mm256_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- int64_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm256_set1_epi64x(tmp);
+ *chunk = _mm256_set1_epi64x(zng_memread_8(from));
}
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
*/
#include "zbuild.h"
+#include "zmemory.h"
#ifdef X86_SSE2
#include <immintrin.h>
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- int16_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm_set1_epi16(tmp);
+ *chunk = _mm_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- int32_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm_set1_epi32(tmp);
+ *chunk = _mm_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- int64_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm_set1_epi64x(tmp);
+ *chunk = _mm_set1_epi64x(zng_memread_8(from));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*/
#include "zbuild.h"
+#include "zmemory.h"
#if defined(X86_SSSE3)
#include <immintrin.h>
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- int16_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm_set1_epi16(tmp);
+ *chunk = _mm_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- int32_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm_set1_epi32(tmp);
+ *chunk = _mm_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- int64_t tmp;
- memcpy(&tmp, from, sizeof(tmp));
- *chunk = _mm_set1_epi64x(tmp);
+ *chunk = _mm_set1_epi64x(zng_memread_8(from));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*/
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
*/
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
*/
#include "zbuild.h"
+#include "zmemory.h"
#include "fallback_builtins.h"
#include "zendian.h"
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
- uint16_t src0_cmp, src1_cmp;
+ uint16_t src0_cmp;
- memcpy(&src0_cmp, src0, sizeof(src0_cmp));
+ src0_cmp = zng_memread_2(src0);
do {
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
- if (src0_cmp != src1_cmp)
+ if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
- if (src0_cmp != src1_cmp)
+ if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
- if (src0_cmp != src1_cmp)
+ if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
- if (src0_cmp != src1_cmp)
+ if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
} while (len < 256);
uint32_t sv, len = 0;
uint16_t src0_cmp;
- memcpy(&src0_cmp, src0, sizeof(src0_cmp));
+ src0_cmp = zng_memread_2(src0);
sv = ((uint32_t)src0_cmp << 16) | src0_cmp;
do {
uint32_t mv, diff;
- memcpy(&mv, src1, sizeof(mv));
+ mv = zng_memread_4(src1);
diff = sv ^ mv;
if (diff) {
uint16_t src0_cmp;
uint64_t sv;
- memcpy(&src0_cmp, src0, sizeof(src0_cmp));
+ src0_cmp = zng_memread_2(src0);
src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp;
sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32;
do {
uint64_t mv, diff;
- memcpy(&mv, src1, sizeof(mv));
+ mv = zng_memread_8(src1);
diff = sv ^ mv;
if (diff) {
#include "zutil.h"
#include "zendian.h"
+#include "zmemory.h"
#include "crc32.h"
#ifdef S390_DFLTCC_DEFLATE
#if BYTE_ORDER == BIG_ENDIAN
w = ZSWAP16(w);
#endif
- memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
+ zng_memwrite_2(&s->pending_buf[s->pending], w);
s->pending += 2;
}
#if BYTE_ORDER == LITTLE_ENDIAN
w = ZSWAP16(w);
#endif
- memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
+ zng_memwrite_2(&s->pending_buf[s->pending], w);
s->pending += 2;
}
#if BYTE_ORDER == BIG_ENDIAN
dw = ZSWAP32(dw);
#endif
- memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
+ zng_memwrite_4(&s->pending_buf[s->pending], dw);
s->pending += 4;
}
#if BYTE_ORDER == LITTLE_ENDIAN
dw = ZSWAP32(dw);
#endif
- memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
+ zng_memwrite_4(&s->pending_buf[s->pending], dw);
s->pending += 4;
}
#if BYTE_ORDER == BIG_ENDIAN
lld = ZSWAP64(lld);
#endif
- memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
+ zng_memwrite_8(&s->pending_buf[s->pending], lld);
s->pending += 8;
}
*/
#include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
#define INFLATE_P_H
#include <stdlib.h>
+#include "zmemory.h"
/* Architecture-specific hooks. */
#ifdef S390_DFLTCC_INFLATE
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
- uint64_t chunk;
- memcpy(&chunk, in, sizeof(chunk));
+ uint64_t chunk = zng_memread_8(in);
#if BYTE_ORDER == LITTLE_ENDIAN
return chunk << bits;
uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
/* So this doesn't give use a worst case scenario of function calls in a loop,
- * we want to instead break this down into copy blocks of fixed lengths */
+ * we want to instead break this down into copy blocks of fixed lengths
+ *
+ * TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
+ */
while (len) {
tocopy = MIN(non_olap_size, len);
len -= tocopy;
*
*/
+#include "zmemory.h"
+
#ifndef HASH_CALC_OFFSET
# define HASH_CALC_OFFSET 0
#endif
#ifndef HASH_CALC_READ
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
- memcpy(&val, strstart, sizeof(val));
+ val = zng_memread_4(strstart);
# else
# define HASH_CALC_READ \
- memcpy(&val, strstart, sizeof(val)); \
- val = ZSWAP32(val);
+ val = ZSWAP32(zng_memread_4(strstart));
# endif
#endif
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
-#if OPTIMAL_CMP >= 32
- uint8_t scan_start[8];
-#endif
+#if OPTIMAL_CMP >= 64
+ uint64_t scan_start;
+ uint64_t scan_end;
+#elif OPTIMAL_CMP >= 32
+ uint32_t scan_start;
+ uint32_t scan_end;
+#else
uint8_t scan_end[8];
+#endif
#define GOTO_NEXT_CHAIN \
if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
#endif
#if OPTIMAL_CMP >= 64
- memcpy(scan_start, scan, sizeof(uint64_t));
- memcpy(scan_end, scan+offset, sizeof(uint64_t));
+ scan_start = zng_memread_8(scan);
+ scan_end = zng_memread_8(scan+offset);
#elif OPTIMAL_CMP >= 32
- memcpy(scan_start, scan, sizeof(uint32_t));
- memcpy(scan_end, scan+offset, sizeof(uint32_t));
+ scan_start = zng_memread_4(scan);
+ scan_end = zng_memread_4(scan+offset);
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
#if OPTIMAL_CMP >= 32
if (best_len < sizeof(uint32_t)) {
for (;;) {
- if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
- zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
+ if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
+ zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# if OPTIMAL_CMP >= 64
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
- if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
- zng_memcmp_8(mbase_start+cur_match, scan_start) == 0)
+ if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
+ zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# endif
} else {
for (;;) {
- if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 &&
- zng_memcmp_4(mbase_start+cur_match, scan_start) == 0)
+ if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
+ zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
#endif
#if OPTIMAL_CMP >= 64
- memcpy(scan_end, scan+offset, sizeof(uint64_t));
+ scan_end = zng_memread_8(scan+offset);
#elif OPTIMAL_CMP >= 32
- memcpy(scan_end, scan+offset, sizeof(uint32_t));
+ scan_end = zng_memread_4(scan+offset);
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
-compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
-deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
+deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
-compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
-deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
+deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
-compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
-compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
-compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
-deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
+deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
--- /dev/null
+/* zmemory.h -- Private inline functions used internally in zlib-ng
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef _ZMEMORY_H
+#define _ZMEMORY_H
+
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+# define HAVE_MAY_ALIAS
+#endif
+
+static inline uint16_t zng_memread_2(const void *ptr) {
+#if defined(HAVE_MAY_ALIAS)
+ typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
+ return ((const unaligned_uint16_t *)ptr)->val;
+#else
+ uint16_t val;
+ memcpy(&val, ptr, sizeof(val));
+ return val;
+#endif
+}
+
+static inline uint32_t zng_memread_4(const void *ptr) {
+#if defined(HAVE_MAY_ALIAS)
+ typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
+ return ((const unaligned_uint32_t *)ptr)->val;
+#else
+ uint32_t val;
+ memcpy(&val, ptr, sizeof(val));
+ return val;
+#endif
+}
+
+static inline uint64_t zng_memread_8(const void *ptr) {
+#if defined(HAVE_MAY_ALIAS)
+ typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
+ return ((const unaligned_uint64_t *)ptr)->val;
+#else
+ uint64_t val;
+ memcpy(&val, ptr, sizeof(val));
+ return val;
+#endif
+}
+
+static inline void zng_memwrite_2(void *ptr, uint16_t val) {
+#if defined(HAVE_MAY_ALIAS)
+ typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
+ ((unaligned_uint16_t *)ptr)->val = val;
+#else
+ memcpy(ptr, &val, sizeof(val));
+#endif
+}
+
+static inline void zng_memwrite_4(void *ptr, uint32_t val) {
+#if defined(HAVE_MAY_ALIAS)
+ typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
+ ((unaligned_uint32_t *)ptr)->val = val;
+#else
+ memcpy(ptr, &val, sizeof(val));
+#endif
+}
+
+static inline void zng_memwrite_8(void *ptr, uint64_t val) {
+#if defined(HAVE_MAY_ALIAS)
+ typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
+ ((unaligned_uint64_t *)ptr)->val = val;
+#else
+ memcpy(ptr, &val, sizeof(val));
+#endif
+}
+
+/* Use zng_memread_* instead of memcmp to avoid older compilers not converting memcmp
+ calls to unaligned comparisons when unaligned access is supported. Use memcmp only when
+ unaligned support is not available to avoid an extra call to memcpy. */
+static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
+#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 16
+ return zng_memread_2(src0) != zng_memread_2(src1);
+#else
+ return memcmp(src0, src1, 2);
+#endif
+}
+
+static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
+#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 32
+ return zng_memread_4(src0) != zng_memread_4(src1);
+#else
+ return memcmp(src0, src1, 4);
+#endif
+}
+
+static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
+#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 64
+ return zng_memread_8(src0) != zng_memread_8(src1);
+#else
+ return memcmp(src0, src1, 8);
+#endif
+}
+
+#endif
#endif
}
-/* Use memcpy instead of memcmp to avoid older compilers not converting memcmp calls to
- unaligned comparisons when unaligned access is supported. */
-static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
- uint16_t src0_cmp, src1_cmp;
-
- memcpy(&src0_cmp, src0, sizeof(src0_cmp));
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-
- return src0_cmp != src1_cmp;
-}
-
-static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
- uint32_t src0_cmp, src1_cmp;
-
- memcpy(&src0_cmp, src0, sizeof(src0_cmp));
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-
- return src0_cmp != src1_cmp;
-}
-
-static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
- uint64_t src0_cmp, src1_cmp;
-
- memcpy(&src0_cmp, src0, sizeof(src0_cmp));
- memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-
- return src0_cmp != src1_cmp;
-}
-
#endif