From: Nathan Moinvaziri Date: Sun, 9 Jan 2022 23:01:23 +0000 (-0800) Subject: Introduce zmemcpy to use unaligned access for architectures we know support unaligned... X-Git-Tag: 2.1.0-beta1~379 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=363a95fb9b8c4bef7225164fa1deb5481a35c3c0;p=thirdparty%2Fzlib-ng.git Introduce zmemcpy to use unaligned access for architectures we know support unaligned access, otherwise use memcpy. --- diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index 1e173b907..2ca8ce07e 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -25,19 +25,19 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { uint16_t tmp; - memcpy(&tmp, from, 2); + zmemcpy_2(&tmp, from); *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp)); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { uint32_t tmp; - memcpy(&tmp, from, 4); + zmemcpy_4(&tmp, from); *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp)); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { uint64_t tmp; - memcpy(&tmp, from, 8); + zmemcpy_8(&tmp, from); *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp)); } diff --git a/arch/power/chunkset_power8.c b/arch/power/chunkset_power8.c index e66de9ed2..52734cb2d 100644 --- a/arch/power/chunkset_power8.c +++ b/arch/power/chunkset_power8.c @@ -21,19 +21,19 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { uint16_t tmp; - memcpy(&tmp, from, 2); + zmemcpy_2(&tmp, from); *chunk = (vector unsigned char)vec_splats(tmp); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { uint32_t tmp; - memcpy(&tmp, from, 4); + zmemcpy_4(&tmp, from); *chunk = (vector unsigned char)vec_splats(tmp); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { uint64_t tmp; - memcpy(&tmp, from, 8); + zmemcpy_8(&tmp, from); *chunk = (vector unsigned char)vec_splats(tmp); } diff --git a/chunkset.c b/chunkset.c index cfc6e87f8..14c913ee6 100644 --- a/chunkset.c +++ b/chunkset.c @@ -18,20 +18,20 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { uint8_t *dest = (uint8_t *)chunk; - memcpy(dest, from, sizeof(uint32_t)); - memcpy(dest+4, from, sizeof(uint32_t)); + zmemcpy_4(dest, from); + zmemcpy_4(dest+4, from); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { - memcpy(chunk, from, sizeof(uint64_t)); + zmemcpy_8(chunk, from); } static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { - chunkmemset_8((uint8_t *)s, chunk); + zmemcpy_8(chunk, (uint8_t *)s); } static inline void storechunk(uint8_t *out, chunk_t *chunk) { - memcpy(out, chunk, sizeof(uint64_t)); + zmemcpy_8(out, chunk); } #define CHUNKSIZE chunksize_c diff --git a/chunkset_tpl.h b/chunkset_tpl.h index dced2895d..8e6f56644 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -60,20 +60,20 @@ Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned l #endif #if CHUNK_SIZE >= 8 while (len >= 8) { - memcpy(out, from, 8); + zmemcpy_8(out, from); out += 8; from += 8; len -= 8; } #endif if (len >= 4) { - memcpy(out, from, 4); + zmemcpy_4(out, from); out += 4; from += 4; len -= 4; } if (len >= 2) { - memcpy(out, from, 2); + zmemcpy_2(out, from); out += 2; from += 2; len -= 2; diff --git a/compare256.c b/compare256.c index 68c0cf543..1b693f274 100644 --- a/compare256.c +++ b/compare256.c @@ -101,8 +101,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const do { uint32_t sv, mv, diff; - memcpy(&sv, src0, sizeof(sv)); - memcpy(&mv, src1, sizeof(mv)); + zmemcpy_4(&sv, src0); + zmemcpy_4(&mv, src1); diff = sv ^ mv; if (diff) { @@ -141,8 +141,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const do { uint64_t sv, mv, diff; - memcpy(&sv, src0, sizeof(sv)); - memcpy(&mv, src1, sizeof(mv)); + zmemcpy_8(&sv, src0); + zmemcpy_8(&mv, src1); diff = sv ^ mv; if (diff) { diff --git a/deflate.h b/deflate.h index 7f4eb43f9..87108914e 100644 --- a/deflate.h +++ b/deflate.h @@ -305,7 +305,7 @@ static inline void put_short(deflate_state *s, uint16_t w) { #if BYTE_ORDER == BIG_ENDIAN w = ZSWAP16(w); #endif - memcpy(&s->pending_buf[s->pending], &w, sizeof(w)); + zmemcpy_2(&s->pending_buf[s->pending], &w); s->pending += 2; } @@ -317,7 +317,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) { #if BYTE_ORDER == LITTLE_ENDIAN w = ZSWAP16(w); #endif - memcpy(&s->pending_buf[s->pending], &w, sizeof(w)); + zmemcpy_2(&s->pending_buf[s->pending], &w); s->pending += 2; } @@ -329,7 +329,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) { #if BYTE_ORDER == BIG_ENDIAN dw = ZSWAP32(dw); #endif - memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw)); + zmemcpy_4(&s->pending_buf[s->pending], &dw); s->pending += 4; } @@ -341,7 +341,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) { #if BYTE_ORDER == LITTLE_ENDIAN dw = ZSWAP32(dw); #endif - memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw)); + zmemcpy_4(&s->pending_buf[s->pending], &dw); s->pending += 4; } @@ -353,7 +353,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) { #if BYTE_ORDER == BIG_ENDIAN lld = ZSWAP64(lld); #endif - memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld)); + zmemcpy_8(&s->pending_buf[s->pending], &lld); s->pending += 8; } diff --git a/inffast.c b/inffast.c index 899534f0d..58cbad7ef 100644 --- a/inffast.c +++ b/inffast.c @@ -15,7 +15,7 @@ /* Load 64 bits from IN and place the bytes at offset BITS in the result. */ static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { uint64_t chunk; - memcpy(&chunk, in, sizeof(chunk)); + zmemcpy_8(&chunk, in); #if BYTE_ORDER == LITTLE_ENDIAN return chunk << bits; diff --git a/insert_string_tpl.h b/insert_string_tpl.h index 7d3e46c89..643a5e0e3 100644 --- a/insert_string_tpl.h +++ b/insert_string_tpl.h @@ -29,9 +29,9 @@ # define HASH_CALC_MASK HASH_MASK #endif #ifndef HASH_CALC_READ -# ifdef UNALIGNED_OK +# if BYTE_ORDER == LITTLE_ENDIAN # define HASH_CALC_READ \ - memcpy(&val, strstart, sizeof(val)); + zmemcpy_4(&val, strstart); # else # define HASH_CALC_READ \ val = ((uint32_t)(strstart[0])); \ diff --git a/match_tpl.h b/match_tpl.h index 853a4ad32..49b9e724a 100644 --- a/match_tpl.h +++ b/match_tpl.h @@ -74,11 +74,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { #endif #ifdef UNALIGNED64_OK - memcpy(scan_start, scan, sizeof(uint64_t)); - memcpy(scan_end, scan+offset, sizeof(uint64_t)); + zmemcpy_8(scan_start, scan); + zmemcpy_8(scan_end, scan+offset); #elif defined(UNALIGNED_OK) - memcpy(scan_start, scan, sizeof(uint32_t)); - memcpy(scan_end, scan+offset, sizeof(uint32_t)); + zmemcpy_4(scan_start, scan); + zmemcpy_4(scan_end, scan+offset); #else scan_end[0] = *(scan+offset); scan_end[1] = *(scan+offset+1); @@ -201,9 +201,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { #endif #ifdef UNALIGNED64_OK - memcpy(scan_end, scan+offset, sizeof(uint64_t)); + zmemcpy_8(scan_end, scan+offset); #elif defined(UNALIGNED_OK) - memcpy(scan_end, scan+offset, sizeof(uint32_t)); + zmemcpy_4(scan_end, scan+offset); #else scan_end[0] = *(scan+offset); scan_end[1] = *(scan+offset+1); diff --git a/zbuild.h b/zbuild.h index d2b6a922d..5789eb06d 100644 --- a/zbuild.h +++ b/zbuild.h @@ -194,4 +194,19 @@ # define Tracecv(c, x) #endif +#ifdef UNALIGNED_OK +# define zmemcpy_2(dest, src) *((uint16_t *)dest) = *((uint16_t *)src) +# define zmemcpy_4(dest, src) *((uint32_t *)dest) = *((uint32_t *)src) +# if UINTPTR_MAX == UINT64_MAX +# define zmemcpy_8(dest, src) *((uint64_t *)dest) = *((uint64_t *)src) +# else +# define zmemcpy_8(dest, src) ((uint32_t *)dest)[0] = *((uint32_t *)src)[0] \ + ((uint32_t *)dest)[1] = *((uint32_t *)src)[1] +# endif +#else +# define zmemcpy_2(dest, src) memcpy(dest, src, 2) +# define zmemcpy_4(dest, src) memcpy(dest, src, 4) +# define zmemcpy_8(dest, src) memcpy(dest, src, 8) +#endif + #endif