From: Adam Stylinski Date: Fri, 20 Dec 2024 23:53:51 +0000 (-0500) Subject: Make big endians first class citizens again X-Git-Tag: 2.2.3~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=04d1b758194099bad80f5769610f2679499e135f;p=thirdparty%2Fzlib-ng.git Make big endians first class citizens again No longer do the big iron on yore which lack SIMD optimized loads need to search strings a byte at a time like primitive machines of the vax era. This guard here was mostly due to the fact that the string comparison was searched with "count trailing zero", which assumes an endianness. We can just conditionally use leading zeros when on big endian and stop using the extremely naive C implementation. This makes things a tad bit faster. --- diff --git a/arch/generic/compare256_c.c b/arch/generic/compare256_c.c index 3704c2f6..d20c74ce 100644 --- a/arch/generic/compare256_c.c +++ b/arch/generic/compare256_c.c @@ -57,7 +57,8 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) { #include "match_tpl.h" -#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 +#if OPTIMAL_CMP >= 32 + /* 16-bit unaligned integer comparison */ static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) { uint32_t len = 0; @@ -111,7 +112,11 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const diff = sv ^ mv; if (diff) { +#if BYTE_ORDER == LITTLE_ENDIAN uint32_t match_byte = __builtin_ctz(diff) / 8; +#else + uint32_t match_byte = __builtin_clz(diff) / 8; +#endif return len + match_byte; } @@ -151,7 +156,11 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const diff = sv ^ mv; if (diff) { +#if BYTE_ORDER == LITTLE_ENDIAN uint64_t match_byte = __builtin_ctzll(diff) / 8; +#else + uint64_t match_byte = __builtin_clzll(diff) / 8; +#endif return len + (uint32_t)match_byte; } diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h index eaba70c3..3569f1f2 100644 --- a/arch/generic/generic_functions.h +++ b/arch/generic/generic_functions.h @@ -28,7 +28,7 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); -#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 +#if OPTIMAL_CMP >= 32 uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); # ifdef HAVE_BUILTIN_CTZ uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); @@ -44,7 +44,7 @@ void slide_hash_c(deflate_state *s); uint32_t longest_match_c(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); -#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 +#if OPTIMAL_CMP >= 32 uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); # ifdef HAVE_BUILTIN_CTZ @@ -59,7 +59,7 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); // Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions. -#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 +#if OPTIMAL_CMP >= 32 # if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 # define longest_match_generic longest_match_unaligned_64 # define longest_match_slow_generic longest_match_slow_unaligned_64 diff --git a/zbuild.h b/zbuild.h index 4d3fc5f2..f5c800ad 100644 --- a/zbuild.h +++ b/zbuild.h @@ -257,9 +257,7 @@ # define OPTIMAL_CMP 32 # endif #elif defined(__powerpc64__) || defined(__ppc64__) -# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define OPTIMAL_CMP 64 -# endif +# define OPTIMAL_CMP 64 #endif #if defined(NO_UNALIGNED) # undef OPTIMAL_CMP