From: Nathan Moinvaziri Date: Sun, 5 Feb 2023 20:14:18 +0000 (-0800) Subject: Use memcpy instead of memcmp for unaligned memory comparisons. X-Git-Tag: 2.1.0-beta1~53 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=596f6c2f37eca68db8a9ee7c658dfdf8f15d2a81;p=thirdparty%2Fzlib-ng.git Use memcpy instead of memcmp for unaligned memory comparisons. Use memcpy because it is better supported for compilers that support unaligned access than memcmp. --- diff --git a/compare256.c b/compare256.c index b11ac24e..94b88e75 100644 --- a/compare256.c +++ b/compare256.c @@ -4,6 +4,7 @@ */ #include "zbuild.h" +#include "zutil_p.h" #include "fallback_builtins.h" /* ALIGNED, byte comparison */ @@ -61,16 +62,19 @@ static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint32_t len = 0; do { - if (zmemcmp_2(src0, src1) != 0) + if (zng_memcmp_2(src0, src1) != 0) return len + (*src0 == *src1); src0 += 2, src1 += 2, len += 2; - if (zmemcmp_2(src0, src1) != 0) + + if (zng_memcmp_2(src0, src1) != 0) return len + (*src0 == *src1); src0 += 2, src1 += 2, len += 2; - if (zmemcmp_2(src0, src1) != 0) + + if (zng_memcmp_2(src0, src1) != 0) return len + (*src0 == *src1); src0 += 2, src1 += 2, len += 2; - if (zmemcmp_2(src0, src1) != 0) + + if (zng_memcmp_2(src0, src1) != 0) return len + (*src0 == *src1); src0 += 2, src1 += 2, len += 2; } while (len < 256); diff --git a/deflate_quick.c b/deflate_quick.c index 4d8013e7..d616677c 100644 --- a/deflate_quick.c +++ b/deflate_quick.c @@ -18,6 +18,7 @@ */ #include "zbuild.h" +#include "zutil_p.h" #include "deflate.h" #include "deflate_p.h" #include "functable.h" @@ -92,7 +93,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { const uint8_t *str_start = s->window + s->strstart; const uint8_t *match_start = s->window + hash_head; - if (zmemcmp_2(str_start, match_start) == 0) { + if (zng_memcmp_2(str_start, match_start) == 0) { match_len = functable.compare256(str_start+2, match_start+2) + 2; if (match_len >= WANT_MIN_MATCH) { diff --git a/match_tpl.h b/match_tpl.h index fbd34e58..d0767985 100644 --- a/match_tpl.h +++ b/match_tpl.h @@ -9,6 +9,7 @@ */ #include "zbuild.h" +#include "zutil_p.h" #include "deflate.h" #include "functable.h" @@ -145,24 +146,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { #ifdef UNALIGNED_OK if (best_len < sizeof(uint32_t)) { for (;;) { - if (zmemcmp_2(mbase_end+cur_match, scan_end) == 0 && - zmemcmp_2(mbase_start+cur_match, scan_start) == 0) + if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 && + zng_memcmp_2(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } # ifdef UNALIGNED64_OK } else if (best_len >= sizeof(uint64_t)) { for (;;) { - if (zmemcmp_8(mbase_end+cur_match, scan_end) == 0 && - zmemcmp_8(mbase_start+cur_match, scan_start) == 0) + if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 && + zng_memcmp_8(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } # endif } else { for (;;) { - if (zmemcmp_4(mbase_end+cur_match, scan_end) == 0 && - zmemcmp_4(mbase_start+cur_match, scan_start) == 0) + if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 && + zng_memcmp_4(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } diff --git a/zbuild.h b/zbuild.h index 901111cd..c7df4b1e 100644 --- a/zbuild.h +++ b/zbuild.h @@ -243,25 +243,6 @@ # endif #endif -/* Force compiler to emit unaligned memory comparisons if unaligned access is supported - on the architecture, otherwise don't assume unaligned access is supported. Older - compilers don't optimize memcmp calls for all integer types to unaligned access instructions - when it is supported on the architecture resulting in significant performance impact. */ -#ifdef UNALIGNED_OK -# define zmemcmp_2(str1, str2) (*((uint16_t *)(str1)) != *((uint16_t *)(str2))) -# define zmemcmp_4(str1, str2) (*((uint32_t *)(str1)) != *((uint32_t *)(str2))) -# if defined(UNALIGNED64_OK) && (UINTPTR_MAX == UINT64_MAX) -# define zmemcmp_8(str1, str2) (*((uint64_t *)(str1)) != *((uint64_t *)(str2))) -# else -# define zmemcmp_8(str1, str2) (((uint32_t *)(str1))[0] != ((uint32_t *)(str2))[0] || \ - ((uint32_t *)(str1))[1] != ((uint32_t *)(str2))[1]) -# endif -#else -# define zmemcmp_2(str1, str2) memcmp(str1, str2, 2) -# define zmemcmp_4(str1, str2) memcmp(str1, str2, 4) -# define zmemcmp_8(str1, str2) memcmp(str1, str2, 8) -#endif - #if defined(__has_feature) # if __has_feature(memory_sanitizer) # define Z_MEMORY_SANITIZER 1 diff --git a/zutil_p.h b/zutil_p.h index 318b4d90..caec91d5 100644 --- a/zutil_p.h +++ b/zutil_p.h @@ -39,4 +39,33 @@ static inline void zng_free(void *ptr) { #endif } +/* Use memcpy instead of memcmp to avoid older compilers not converting memcmp calls to + unaligned comparisons when unaligned access is supported. */ +static inline int32_t zng_memcmp_2(const void *src0, const void *src1) { + uint16_t src0_cmp, src1_cmp; + + memcpy(&src0_cmp, src0, sizeof(src0_cmp)); + memcpy(&src1_cmp, src1, sizeof(src1_cmp)); + + return src0_cmp != src1_cmp; +} + +static inline int32_t zng_memcmp_4(const void *src0, const void *src1) { + uint32_t src0_cmp, src1_cmp; + + memcpy(&src0_cmp, src0, sizeof(src0_cmp)); + memcpy(&src1_cmp, src1, sizeof(src1_cmp)); + + return src0_cmp != src1_cmp; +} + +static inline int32_t zng_memcmp_8(const void *src0, const void *src1) { + uint64_t src0_cmp, src1_cmp; + + memcpy(&src0_cmp, src0, sizeof(src0_cmp)); + memcpy(&src1_cmp, src1, sizeof(src1_cmp)); + + return src0_cmp != src1_cmp; +} + #endif