From: Nathan Moinvaziri Date: Wed, 26 Jan 2022 18:51:23 +0000 (-0800) Subject: Introduce zmemcmp to use unaligned access for architectures we know support unaligned... X-Git-Tag: 2.1.0-beta1~378 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ab6665b1bee170c27d229899ed23e6053ebd900b;p=thirdparty%2Fzlib-ng.git Introduce zmemcmp to use unaligned access for architectures we know support unaligned access, otherwise use memcmp. --- diff --git a/deflate_quick.c b/deflate_quick.c index a214a7fe..a3162675 100644 --- a/deflate_quick.c +++ b/deflate_quick.c @@ -92,7 +92,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { const uint8_t *str_start = s->window + s->strstart; const uint8_t *match_start = s->window + hash_head; - if (*(uint16_t *)str_start == *(uint16_t *)match_start) { + if (zmemcmp_2(str_start, match_start) == 0) { match_len = functable.compare256(str_start+2, match_start+2) + 2; if (match_len >= WANT_MIN_MATCH) { diff --git a/match_tpl.h b/match_tpl.h index 49b9e724..3fc71c15 100644 --- a/match_tpl.h +++ b/match_tpl.h @@ -145,24 +145,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { #ifdef UNALIGNED_OK if (best_len < sizeof(uint32_t)) { for (;;) { - if (*(uint16_t *)(mbase_end+cur_match) == *(uint16_t *)scan_end && - *(uint16_t *)(mbase_start+cur_match) == *(uint16_t *)scan_start) + if (zmemcmp_2(mbase_end+cur_match, scan_end) == 0 && + zmemcmp_2(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } # ifdef UNALIGNED64_OK } else if (best_len >= sizeof(uint64_t)) { for (;;) { - if (*(uint64_t *)(mbase_end+cur_match) == *(uint64_t *)scan_end && - *(uint64_t *)(mbase_start+cur_match) == *(uint64_t *)scan_start) + if (zmemcmp_8(mbase_end+cur_match, scan_end) == 0 && + zmemcmp_8(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } # endif } else { for (;;) { - if (*(uint32_t *)(mbase_end+cur_match) == *(uint32_t *)scan_end && - *(uint32_t *)(mbase_start+cur_match) == *(uint32_t *)scan_start) + if (zmemcmp_4(mbase_end+cur_match, scan_end) == 0 && + zmemcmp_4(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } diff --git a/zbuild.h b/zbuild.h index 5789eb06..08bc7ef6 100644 --- a/zbuild.h +++ b/zbuild.h @@ -194,19 +194,32 @@ # define Tracecv(c, x) #endif +/* Force compiler to emit unaligned memory accesses if unaligned access is supported + on the architecture, otherwise don't assume unaligned access is supported. Older + compilers don't optimize memcpy and memcmp calls to unaligned access instructions + when it is supported on the architecture resulting in significant performance impact. + Newer compilers might optimize memcpy but not all optimize memcmp for all integer types. */ #ifdef UNALIGNED_OK -# define zmemcpy_2(dest, src) *((uint16_t *)dest) = *((uint16_t *)src) -# define zmemcpy_4(dest, src) *((uint32_t *)dest) = *((uint32_t *)src) +# define zmemcpy_2(dest, src) (*((uint16_t *)(dest)) = *((uint16_t *)(src))) +# define zmemcmp_2(str1, str2) (*((uint16_t *)(str1)) != *((uint16_t *)(str2))) +# define zmemcpy_4(dest, src) (*((uint32_t *)(dest)) = *((uint32_t *)(src))) +# define zmemcmp_4(str1, str2) (*((uint32_t *)(str1)) != *((uint32_t *)(str2))) # if UINTPTR_MAX == UINT64_MAX -# define zmemcpy_8(dest, src) *((uint64_t *)dest) = *((uint64_t *)src) +# define zmemcpy_8(dest, src) (*((uint64_t *)(dest)) = *((uint64_t *)(src))) +# define zmemcmp_8(str1, str2) (*((uint64_t *)(str1)) != *((uint64_t *)(str2))) # else -# define zmemcpy_8(dest, src) ((uint32_t *)dest)[0] = *((uint32_t *)src)[0] \ - ((uint32_t *)dest)[1] = *((uint32_t *)src)[1] +# define zmemcpy_8(dest, src) (((uint32_t *)(dest))[0] = ((uint32_t *)(src))[0], \ + ((uint32_t *)(dest))[1] = ((uint32_t *)(src))[1]) +# define zmemcmp_8(str1, str2) (((uint32_t *)(str1))[0] != ((uint32_t *)(str2))[0] || \ + ((uint32_t *)(str1))[1] != ((uint32_t *)(str2))[1]) # endif #else -# define zmemcpy_2(dest, src) memcpy(dest, src, 2) -# define zmemcpy_4(dest, src) memcpy(dest, src, 4) -# define zmemcpy_8(dest, src) memcpy(dest, src, 8) +# define zmemcpy_2(dest, src) memcpy(dest, src, 2) +# define zmemcmp_2(str1, str2) memcmp(str1, str2, 2) +# define zmemcpy_4(dest, src) memcpy(dest, src, 4) +# define zmemcmp_4(str1, str2) memcmp(str1, str2, 4) +# define zmemcpy_8(dest, src) memcpy(dest, src, 8) +# define zmemcmp_8(str1, str2) memcmp(str1, str2, 8) #endif #endif