]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Use GCC's may_alias attribute for unaligned memory access
authorCameron Cawley <ccawley2011@gmail.com>
Thu, 27 Jul 2023 20:07:29 +0000 (21:07 +0100)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Tue, 24 Dec 2024 11:55:44 +0000 (12:55 +0100)
25 files changed:
arch/arm/chunkset_neon.c
arch/arm/compare256_neon.c
arch/generic/Makefile.in
arch/generic/chunkset_c.c
arch/generic/compare256_c.c
arch/power/chunkset_power8.c
arch/power/compare256_power9.c
arch/riscv/compare256_rvv.c
arch/x86/chunkset_avx2.c
arch/x86/chunkset_avx512.c
arch/x86/chunkset_sse2.c
arch/x86/chunkset_ssse3.c
arch/x86/compare256_avx2.c
arch/x86/compare256_sse2.c
compare256_rle.h
deflate.h
deflate_quick.c
inflate_p.h
insert_string_tpl.h
match_tpl.h
win32/Makefile.a64
win32/Makefile.arm
win32/Makefile.msc
zmemory.h [new file with mode: 0644]
zutil_p.h

index da9d7f95b0d9e561fa3fdbf9bbab74549e913c36..68c9fef699b49e46ebbb6d7387c328f9b910faa1 100644 (file)
@@ -5,6 +5,7 @@
 #ifdef ARM_NEON
 #include "neon_intrins.h"
 #include "zbuild.h"
+#include "zmemory.h"
 #include "arch/generic/chunk_permute_table.h"
 
 typedef uint8x16_t chunk_t;
@@ -31,21 +32,15 @@ static const lut_rem_pair perm_idx_lut[13] = {
 };
 
 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
-    uint16_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
+    *chunk = vreinterpretq_u8_u16(vdupq_n_u16(zng_memread_2(from)));
 }
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    uint32_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
+    *chunk = vreinterpretq_u8_u32(vdupq_n_u32(zng_memread_4(from)));
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    uint64_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
+    *chunk = vreinterpretq_u8_u64(vdupq_n_u64(zng_memread_8(from)));
 }
 
 #define CHUNKSIZE        chunksize_neon
index 87d14c89c09bc1fe5dcff06ae042aa737e597bad..3d05152f348c1c0232cee8b8d67780d8b0c2362c 100644 (file)
@@ -4,7 +4,7 @@
  */
 
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
 
index 32c8242d026f1567bfcabee3ce535508b93719ba..15d51d313575a849f7d18a27798dc5eafe8db533 100644 (file)
@@ -40,10 +40,10 @@ chunkset_c.o: $(SRCDIR)/chunkset_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.
 chunkset_c.lo: $(SRCDIR)/chunkset_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
        $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
 
-compare256_c.o: $(SRCDIR)/compare256_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
+compare256_c.o: $(SRCDIR)/compare256_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
        $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
 
-compare256_c.lo: $(SRCDIR)/compare256_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
+compare256_c.lo: $(SRCDIR)/compare256_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
        $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
 
 crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c  $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
index 7b2bb7ba3676a730d16273bd8cd80a653a7a3e0d..0a585e6caaf52be08d3ed9814fc2dc7965fb7816 100644 (file)
@@ -3,6 +3,7 @@
  */
 
 #include "zbuild.h"
+#include "zmemory.h"
 
 typedef uint64_t chunk_t;
 
@@ -12,21 +13,20 @@ typedef uint64_t chunk_t;
 #define HAVE_CHUNKMEMSET_8
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    uint8_t *dest = (uint8_t *)chunk;
-    memcpy(dest, from, sizeof(uint32_t));
-    memcpy(dest+4, from, sizeof(uint32_t));
+    uint32_t tmp = zng_memread_4(from);
+    *chunk = tmp | ((chunk_t)tmp << 32);
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    memcpy(chunk, from, sizeof(uint64_t));
+    *chunk = zng_memread_8(from);
 }
 
 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
-    memcpy(chunk, (uint8_t *)s, sizeof(uint64_t));
+    *chunk = zng_memread_8(s);
 }
 
 static inline void storechunk(uint8_t *out, chunk_t *chunk) {
-    memcpy(out, chunk, sizeof(uint64_t));
+    zng_memwrite_8(out, *chunk);
 }
 
 #define CHUNKSIZE        chunksize_c
index d20c74ce8483b3e5b925fbf233f8759657d45cd8..ae0e71796995f5ade0ec3e0eba3e2e96add885a5 100644 (file)
@@ -4,7 +4,7 @@
  */
 
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
 
@@ -107,8 +107,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
     do {
         uint32_t sv, mv, diff;
 
-        memcpy(&sv, src0, sizeof(sv));
-        memcpy(&mv, src1, sizeof(mv));
+        sv = zng_memread_4(src0);
+        mv = zng_memread_4(src1);
 
         diff = sv ^ mv;
         if (diff) {
@@ -151,8 +151,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
     do {
         uint64_t sv, mv, diff;
 
-        memcpy(&sv, src0, sizeof(sv));
-        memcpy(&mv, src1, sizeof(mv));
+        sv = zng_memread_8(src0);
+        mv = zng_memread_8(src1);
 
         diff = sv ^ mv;
         if (diff) {
index aef19732736f57eeab01ba4141a6d1eef896b54b..673fe0e1128a8166b9aaf549662f50531ee70965 100644 (file)
@@ -5,6 +5,7 @@
 #ifdef POWER8_VSX
 #include <altivec.h>
 #include "zbuild.h"
+#include "zmemory.h"
 
 typedef vector unsigned char chunk_t;
 
@@ -15,21 +16,15 @@ typedef vector unsigned char chunk_t;
 #define HAVE_CHUNKMEMSET_8
 
 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
-    uint16_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = (vector unsigned char)vec_splats(tmp);
+    *chunk = (vector unsigned char)vec_splats(zng_memread_2(from));
 }
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    uint32_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = (vector unsigned char)vec_splats(tmp);
+    *chunk = (vector unsigned char)vec_splats(zng_memread_4(from));
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    uint64_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = (vector unsigned char)vec_splats((unsigned long long)tmp);
+    *chunk = (vector unsigned char)vec_splats((unsigned long long)zng_memread_8(from));
 }
 
 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
index c8be498e4f60b2f3b84c90b5a1d4f9f7de923e20..2875719c47c7c5cbe919f89e7ea5885fe0b05d66 100644 (file)
@@ -6,7 +6,7 @@
 #ifdef POWER9
 #include <altivec.h>
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "zendian.h"
 
index 3d6c3e3aa5b1e923bb864676ab3fb22ba14eba31..3ddb4db08033cb8b6b3824518c1dde901c803eeb 100644 (file)
@@ -7,7 +7,7 @@
 #ifdef RISCV_RVV
 
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
 
index bbcdcaea930baef8d16b366ec61ae19e09ac0fc8..b9051bb90e4f7029f70066b86bf9d4b4efd49aab 100644 (file)
@@ -2,6 +2,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 #include "zbuild.h"
+#include "zmemory.h"
 
 #ifdef X86_AVX2
 #include "avx2_tables.h"
@@ -19,21 +20,15 @@ typedef __m128i halfchunk_t;
 #define HAVE_HALF_CHUNK
 
 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
-    int16_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm256_set1_epi16(tmp);
+    *chunk = _mm256_set1_epi16(zng_memread_2(from));
 }
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    int32_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm256_set1_epi32(tmp);
+    *chunk = _mm256_set1_epi32(zng_memread_4(from));
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    int64_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm256_set1_epi64x(tmp);
+    *chunk = _mm256_set1_epi64x(zng_memread_8(from));
 }
 
 static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
index 3d51ad1d9b85c747c08b3f9139fa43ed573bd33c..929d04cdd9ad3b214a3a492ede7110c31ba64fb2 100644 (file)
@@ -2,6 +2,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 #include "zbuild.h"
+#include "zmemory.h"
 
 #ifdef X86_AVX512
 
@@ -33,21 +34,15 @@ static inline mask_t gen_mask(unsigned len) {
 }
 
 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
-    int16_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm256_set1_epi16(tmp);
+    *chunk = _mm256_set1_epi16(zng_memread_2(from));
 }
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    int32_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm256_set1_epi32(tmp);
+    *chunk = _mm256_set1_epi32(zng_memread_4(from));
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    int64_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm256_set1_epi64x(tmp);
+    *chunk = _mm256_set1_epi64x(zng_memread_8(from));
 }
 
 static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
index 0b1593b5b50d46f1b9d393921f270944b9423af4..dcfe2c70d23f06197fd809ec7536d1dfa01dfade 100644 (file)
@@ -3,6 +3,7 @@
  */
 
 #include "zbuild.h"
+#include "zmemory.h"
 
 #ifdef X86_SSE2
 #include <immintrin.h>
@@ -14,21 +15,15 @@ typedef __m128i chunk_t;
 #define HAVE_CHUNKMEMSET_8
 
 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
-    int16_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm_set1_epi16(tmp);
+    *chunk = _mm_set1_epi16(zng_memread_2(from));
 }
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    int32_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm_set1_epi32(tmp);
+    *chunk = _mm_set1_epi32(zng_memread_4(from));
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    int64_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm_set1_epi64x(tmp);
+    *chunk = _mm_set1_epi64x(zng_memread_8(from));
 }
 
 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
index deedb6ce3c4820288a201f1e46dcd50730302103..75b698c61b011f712c81a915e139209692019aad 100644 (file)
@@ -3,6 +3,7 @@
  */
 
 #include "zbuild.h"
+#include "zmemory.h"
 
 #if defined(X86_SSSE3)
 #include <immintrin.h>
@@ -33,21 +34,15 @@ static const lut_rem_pair perm_idx_lut[13] = {
 
 
 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
-    int16_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm_set1_epi16(tmp);
+    *chunk = _mm_set1_epi16(zng_memread_2(from));
 }
 
 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-    int32_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm_set1_epi32(tmp);
+    *chunk = _mm_set1_epi32(zng_memread_4(from));
 }
 
 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-    int64_t tmp;
-    memcpy(&tmp, from, sizeof(tmp));
-    *chunk = _mm_set1_epi64x(tmp);
+    *chunk = _mm_set1_epi64x(zng_memread_8(from));
 }
 
 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
index d2c835e4ee8ec4c99111134b7008bf2a0722f6e1..8a0213c3a62edf6e428c491ce99c4ac93239629c 100644 (file)
@@ -4,7 +4,7 @@
  */
 
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
 
index 216bb3a705c4a4329e96d3a4912163195737595a..25b65316a8b8b8cfa2f330631e34af5efbc7fb8f 100644 (file)
@@ -4,7 +4,7 @@
  */
 
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
 
index c7a4086af663cfbaf4d358a638f3e0fa40a77082..9940a284954832d3d955c021fd933b69ab9ebf3e 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "zbuild.h"
+#include "zmemory.h"
 #include "fallback_builtins.h"
 #include "zendian.h"
 
@@ -47,25 +48,21 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
 /* 16-bit unaligned integer comparison */
 static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
     uint32_t len = 0;
-    uint16_t src0_cmp, src1_cmp;
+    uint16_t src0_cmp;
 
-    memcpy(&src0_cmp, src0, sizeof(src0_cmp));
+    src0_cmp = zng_memread_2(src0);
 
     do {
-        memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-        if (src0_cmp != src1_cmp)
+        if (src0_cmp != zng_memread_2(src1))
             return len + (*src0 == *src1);
         src1 += 2, len += 2;
-        memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-        if (src0_cmp != src1_cmp)
+        if (src0_cmp != zng_memread_2(src1))
             return len + (*src0 == *src1);
         src1 += 2, len += 2;
-        memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-        if (src0_cmp != src1_cmp)
+        if (src0_cmp != zng_memread_2(src1))
             return len + (*src0 == *src1);
         src1 += 2, len += 2;
-        memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-        if (src0_cmp != src1_cmp)
+        if (src0_cmp != zng_memread_2(src1))
             return len + (*src0 == *src1);
         src1 += 2, len += 2;
     } while (len < 256);
@@ -79,13 +76,13 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui
     uint32_t sv, len = 0;
     uint16_t src0_cmp;
 
-    memcpy(&src0_cmp, src0, sizeof(src0_cmp));
+    src0_cmp = zng_memread_2(src0);
     sv = ((uint32_t)src0_cmp << 16) | src0_cmp;
 
     do {
         uint32_t mv, diff;
 
-        memcpy(&mv, src1, sizeof(mv));
+        mv = zng_memread_4(src1);
 
         diff = sv ^ mv;
         if (diff) {
@@ -112,14 +109,14 @@ static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const ui
     uint16_t src0_cmp;
     uint64_t sv;
 
-    memcpy(&src0_cmp, src0, sizeof(src0_cmp));
+    src0_cmp = zng_memread_2(src0);
     src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp;
     sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32;
 
     do {
         uint64_t mv, diff;
 
-        memcpy(&mv, src1, sizeof(mv));
+        mv = zng_memread_8(src1);
 
         diff = sv ^ mv;
         if (diff) {
index 2077f068e7a988098fbfc623dc8c84d7dda1af15..4b79f8f43b2c83b09578981d0c2316cb8522b4b5 100644 (file)
--- a/deflate.h
+++ b/deflate.h
@@ -12,6 +12,7 @@
 
 #include "zutil.h"
 #include "zendian.h"
+#include "zmemory.h"
 #include "crc32.h"
 
 #ifdef S390_DFLTCC_DEFLATE
@@ -355,7 +356,7 @@ static inline void put_short(deflate_state *s, uint16_t w) {
 #if BYTE_ORDER == BIG_ENDIAN
     w = ZSWAP16(w);
 #endif
-    memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
+    zng_memwrite_2(&s->pending_buf[s->pending], w);
     s->pending += 2;
 }
 
@@ -367,7 +368,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
 #if BYTE_ORDER == LITTLE_ENDIAN
     w = ZSWAP16(w);
 #endif
-    memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
+    zng_memwrite_2(&s->pending_buf[s->pending], w);
     s->pending += 2;
 }
 
@@ -379,7 +380,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
 #if BYTE_ORDER == BIG_ENDIAN
     dw = ZSWAP32(dw);
 #endif
-    memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
+    zng_memwrite_4(&s->pending_buf[s->pending], dw);
     s->pending += 4;
 }
 
@@ -391,7 +392,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
 #if BYTE_ORDER == LITTLE_ENDIAN
     dw = ZSWAP32(dw);
 #endif
-    memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
+    zng_memwrite_4(&s->pending_buf[s->pending], dw);
     s->pending += 4;
 }
 
@@ -403,7 +404,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) {
 #if BYTE_ORDER == BIG_ENDIAN
     lld = ZSWAP64(lld);
 #endif
-    memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
+    zng_memwrite_8(&s->pending_buf[s->pending], lld);
     s->pending += 8;
 }
 
index 91c96ac52ed32d07f088d422cae6fe1f766f42b3..d5fd986d7ac6d58dee3efa062c9ee3117d891da2 100644 (file)
@@ -18,7 +18,7 @@
  */
 
 #include "zbuild.h"
-#include "zutil_p.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "deflate_p.h"
 #include "functable.h"
index 54c8dec90052cb9662201f0dffbc9fd831395030..179f76f7340c6e11dfe2ad4d3236ea2f8fb14d26 100644 (file)
@@ -6,6 +6,7 @@
 #define INFLATE_P_H
 
 #include <stdlib.h>
+#include "zmemory.h"
 
 /* Architecture-specific hooks. */
 #ifdef S390_DFLTCC_INFLATE
 
 /* Load 64 bits from IN and place the bytes at offset BITS in the result. */
 static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
-    uint64_t chunk;
-    memcpy(&chunk, in, sizeof(chunk));
+    uint64_t chunk = zng_memread_8(in);
 
 #if BYTE_ORDER == LITTLE_ENDIAN
     return chunk << bits;
@@ -175,7 +175,10 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
     uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
 
     /* So this doesn't give use a worst case scenario of function calls in a loop,
-     * we want to instead break this down into copy blocks of fixed lengths */
+     * we want to instead break this down into copy blocks of fixed lengths
+     *
+     * TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
+     */
     while (len) {
         tocopy = MIN(non_olap_size, len);
         len -= tocopy;
index e7037c04e69b9a713d6b9b0ce07a84f3e2da0929..1548ca741ad19b5ac8346e39cebc1b80016f9a03 100644 (file)
@@ -22,6 +22,8 @@
  *
  */
 
+#include "zmemory.h"
+
 #ifndef HASH_CALC_OFFSET
 #  define HASH_CALC_OFFSET 0
 #endif
 #ifndef HASH_CALC_READ
 #  if BYTE_ORDER == LITTLE_ENDIAN
 #    define HASH_CALC_READ \
-        memcpy(&val, strstart, sizeof(val));
+        val = zng_memread_4(strstart);
 #  else
 #    define HASH_CALC_READ \
-        memcpy(&val, strstart, sizeof(val)); \
-        val = ZSWAP32(val);
+        val = ZSWAP32(zng_memread_4(strstart));
 #  endif
 #endif
 
index f44da750fb355ccbae1fe0c015943115878f1a10..5d00bd013987f11797ba8ed31b10280cd82b19b8 100644 (file)
@@ -40,10 +40,15 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
     uint32_t chain_length, nice_match, best_len, offset;
     uint32_t lookahead = s->lookahead;
     Pos match_offset = 0;
-#if OPTIMAL_CMP >= 32
-    uint8_t scan_start[8];
-#endif
+#if OPTIMAL_CMP >= 64
+    uint64_t scan_start;
+    uint64_t scan_end;
+#elif OPTIMAL_CMP >= 32
+    uint32_t scan_start;
+    uint32_t scan_end;
+#else
     uint8_t scan_end[8];
+#endif
 
 #define GOTO_NEXT_CHAIN \
     if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
@@ -70,11 +75,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
 #endif
 
 #if OPTIMAL_CMP >= 64
-    memcpy(scan_start, scan, sizeof(uint64_t));
-    memcpy(scan_end, scan+offset, sizeof(uint64_t));
+    scan_start = zng_memread_8(scan);
+    scan_end = zng_memread_8(scan+offset);
 #elif OPTIMAL_CMP >= 32
-    memcpy(scan_start, scan, sizeof(uint32_t));
-    memcpy(scan_end, scan+offset, sizeof(uint32_t));
+    scan_start = zng_memread_4(scan);
+    scan_end = zng_memread_4(scan+offset);
 #else
     scan_end[0] = *(scan+offset);
     scan_end[1] = *(scan+offset+1);
@@ -141,24 +146,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
 #if OPTIMAL_CMP >= 32
         if (best_len < sizeof(uint32_t)) {
             for (;;) {
-                if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
-                    zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
+                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
+                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
                     break;
                 GOTO_NEXT_CHAIN;
             }
 #  if OPTIMAL_CMP >= 64
         } else if (best_len >= sizeof(uint64_t)) {
             for (;;) {
-                if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
-                    zng_memcmp_8(mbase_start+cur_match, scan_start) == 0)
+                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
+                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
                     break;
                 GOTO_NEXT_CHAIN;
             }
 #  endif
         } else {
             for (;;) {
-                if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 &&
-                    zng_memcmp_4(mbase_start+cur_match, scan_start) == 0)
+                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
+                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
                     break;
                 GOTO_NEXT_CHAIN;
             }
@@ -197,9 +202,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
 #endif
 
 #if OPTIMAL_CMP >= 64
-            memcpy(scan_end, scan+offset, sizeof(uint64_t));
+            scan_end = zng_memread_8(scan+offset);
 #elif OPTIMAL_CMP >= 32
-            memcpy(scan_end, scan+offset, sizeof(uint32_t));
+            scan_end = zng_memread_4(scan+offset);
 #else
             scan_end[0] = *(scan+offset);
             scan_end[1] = *(scan+offset+1);
index 9f8d6fb7facca3512bf25c48136d0ef09df944e3..3209f6a30511e29688b31448e99d0b04159f3067 100644 (file)
@@ -183,7 +183,7 @@ adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_
 adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
 adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
 chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
-compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
 compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
 cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
 crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
@@ -194,7 +194,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
 deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
-deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
+deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
 deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
 deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
index cab999dfe036d873c52fd3088003014a51b729de..54da045ffddc8e231cb45584bd47e7145fbeb6ea 100644 (file)
@@ -204,7 +204,7 @@ adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_
 adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
 adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
 chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
-compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
 compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
 cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
 crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
@@ -215,7 +215,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
 deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
-deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
+deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
 deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
 deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
index 8392fe46e7e89dced4dab4a20c6e86a0900b9d37..62ca621aef71b5b8e00450d4f15cc7aa977fdeb2 100644 (file)
@@ -212,9 +212,9 @@ chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset
 chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
 chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
 chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
-compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
-compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
-compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
+compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
 compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
 cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
 crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
@@ -226,7 +226,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
 deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
-deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
+deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
 deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
 deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
 deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
diff --git a/zmemory.h b/zmemory.h
new file mode 100644 (file)
index 0000000..99ffd9e
--- /dev/null
+++ b/zmemory.h
@@ -0,0 +1,99 @@
+/* zmemory.h -- Private inline functions used internally in zlib-ng
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef _ZMEMORY_H
+#define _ZMEMORY_H
+
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HAVE_MAY_ALIAS
+#endif
+
+static inline uint16_t zng_memread_2(const void *ptr) {
+#if defined(HAVE_MAY_ALIAS)
+    typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
+    return ((const unaligned_uint16_t *)ptr)->val;
+#else
+    uint16_t val;
+    memcpy(&val, ptr, sizeof(val));
+    return val;
+#endif
+}
+
+static inline uint32_t zng_memread_4(const void *ptr) {
+#if defined(HAVE_MAY_ALIAS)
+    typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
+    return ((const unaligned_uint32_t *)ptr)->val;
+#else
+    uint32_t val;
+    memcpy(&val, ptr, sizeof(val));
+    return val;
+#endif
+}
+
+static inline uint64_t zng_memread_8(const void *ptr) {
+#if defined(HAVE_MAY_ALIAS)
+    typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
+    return ((const unaligned_uint64_t *)ptr)->val;
+#else
+    uint64_t val;
+    memcpy(&val, ptr, sizeof(val));
+    return val;
+#endif
+}
+
+static inline void zng_memwrite_2(void *ptr, uint16_t val) {
+#if defined(HAVE_MAY_ALIAS)
+    typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
+    ((unaligned_uint16_t *)ptr)->val = val;
+#else
+    memcpy(ptr, &val, sizeof(val));
+#endif
+}
+
+static inline void zng_memwrite_4(void *ptr, uint32_t val) {
+#if defined(HAVE_MAY_ALIAS)
+    typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
+    ((unaligned_uint32_t *)ptr)->val = val;
+#else
+    memcpy(ptr, &val, sizeof(val));
+#endif
+}
+
+static inline void zng_memwrite_8(void *ptr, uint64_t val) {
+#if defined(HAVE_MAY_ALIAS)
+    typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
+    ((unaligned_uint64_t *)ptr)->val = val;
+#else
+    memcpy(ptr, &val, sizeof(val));
+#endif
+}
+
+/* Use zng_memread_* instead of memcmp to avoid older compilers not converting memcmp
+   calls to unaligned comparisons when unaligned access is supported. Use memcmp only when
+   unaligned support is not available to avoid an extra call to memcpy. */
+static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
+#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 16
+    return zng_memread_2(src0) != zng_memread_2(src1);
+#else
+    return memcmp(src0, src1, 2);
+#endif
+}
+
+static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
+#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 32
+    return zng_memread_4(src0) != zng_memread_4(src1);
+#else
+    return memcmp(src0, src1, 4);
+#endif
+}
+
+static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
+#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 64
+    return zng_memread_8(src0) != zng_memread_8(src1);
+#else
+    return memcmp(src0, src1, 8);
+#endif
+}
+
+#endif
index 97799f0ce31ef232e508a2f5ed85b25c10e9f1a3..835e12f4de5472804737ccc65b0c7c653c3f61ca 100644 (file)
--- a/zutil_p.h
+++ b/zutil_p.h
@@ -43,33 +43,4 @@ static inline void zng_free(void *ptr) {
 #endif
 }
 
-/* Use memcpy instead of memcmp to avoid older compilers not converting memcmp calls to
-   unaligned comparisons when unaligned access is supported. */
-static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
-    uint16_t src0_cmp, src1_cmp;
-
-    memcpy(&src0_cmp, src0, sizeof(src0_cmp));
-    memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-
-    return src0_cmp != src1_cmp;
-}
-
-static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
-    uint32_t src0_cmp, src1_cmp;
-
-    memcpy(&src0_cmp, src0, sizeof(src0_cmp));
-    memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-
-    return src0_cmp != src1_cmp;
-}
-
-static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
-    uint64_t src0_cmp, src1_cmp;
-
-    memcpy(&src0_cmp, src0, sizeof(src0_cmp));
-    memcpy(&src1_cmp, src1, sizeof(src1_cmp));
-
-    return src0_cmp != src1_cmp;
-}
-
 #endif