]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Make big endians first class citizens again
authorAdam Stylinski <kungfujesus06@gmail.com>
Fri, 20 Dec 2024 23:53:51 +0000 (18:53 -0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 21 Dec 2024 12:16:08 +0000 (13:16 +0100)
No longer do the big iron on yore which lack SIMD optimized loads need
to search strings a byte at a time like primitive machines of the vax
era. This guard here was mostly due to the fact that the string
comparison was searched with "count trailing zero", which assumes an
endianness.  We can just conditionally use leading zeros when on big
endian and stop using the extremely naive C implementation. This makes
things a tad bit faster.

arch/generic/compare256_c.c
arch/generic/generic_functions.h
zbuild.h

index 3704c2f6cc5ca8f85376528a817d58f959c145d1..d20c74ce8483b3e5b925fbf233f8759657d45cd8 100644 (file)
@@ -57,7 +57,8 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
 
 #include "match_tpl.h"
 
-#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+#if OPTIMAL_CMP >= 32
+
 /* 16-bit unaligned integer comparison */
 static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
     uint32_t len = 0;
@@ -111,7 +112,11 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
 
         diff = sv ^ mv;
         if (diff) {
+#if BYTE_ORDER == LITTLE_ENDIAN
             uint32_t match_byte = __builtin_ctz(diff) / 8;
+#else
+            uint32_t match_byte = __builtin_clz(diff) / 8;
+#endif
             return len + match_byte;
         }
 
@@ -151,7 +156,11 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
 
         diff = sv ^ mv;
         if (diff) {
+#if BYTE_ORDER == LITTLE_ENDIAN
             uint64_t match_byte = __builtin_ctzll(diff) / 8;
+#else
+            uint64_t match_byte = __builtin_clzll(diff) / 8;
+#endif
             return len + (uint32_t)match_byte;
         }
 
index eaba70c31d131e7a2188e69b7ee21bdb3ee6a5d5..3569f1f291c60bb28da8e21d562a6fb03492928b 100644 (file)
@@ -28,7 +28,7 @@ void     inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
 uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
 
 uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
-#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+#if OPTIMAL_CMP >= 32
     uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
 #  ifdef HAVE_BUILTIN_CTZ
         uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
@@ -44,7 +44,7 @@ void     slide_hash_c(deflate_state *s);
 
 uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
 uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
-#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+#if OPTIMAL_CMP >= 32
     uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
     uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
 #  ifdef HAVE_BUILTIN_CTZ
@@ -59,7 +59,7 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
 
 
 // Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
-#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+#if OPTIMAL_CMP >= 32
 #  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 #    define longest_match_generic longest_match_unaligned_64
 #    define longest_match_slow_generic longest_match_slow_unaligned_64
index 4d3fc5f2e4ec00e40c9ce3543143ddf4ef64f0cf..f5c800ad8cae3d578f0f55efa80ec4ddec38fef4 100644 (file)
--- a/zbuild.h
+++ b/zbuild.h
 #    define OPTIMAL_CMP 32
 #  endif
 #elif defined(__powerpc64__) || defined(__ppc64__)
-#  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#    define OPTIMAL_CMP 64
-#  endif
+#    define OPTIMAL_CMP 64 
 #endif
 #if defined(NO_UNALIGNED)
 #  undef OPTIMAL_CMP