From: Nathan Moinvaziri Date: Sat, 6 Dec 2025 15:55:07 +0000 (-0800) Subject: Use __builtin_bitreverse16 in bi_reverse if available. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=46ef5b645c27441323ed33ab07653a96d3ada0d1;p=thirdparty%2Fzlib-ng.git Use __builtin_bitreverse16 in bi_reverse if available. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 5bb690607..39b0efe75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -643,6 +643,24 @@ if(HAVE_BUILTIN_CTZLL) endif() set(CMAKE_REQUIRED_FLAGS) +# +# check for __builtin_bitreverse16() support in the compiler +# +set(CMAKE_REQUIRED_FLAGS ${ZNOLTOFLAG}) +check_c_source_compiles( + "int main(void) { + unsigned short val = 0x1234; + unsigned short test = __builtin_bitreverse16(val); + (void)test; + return 0; + }" + HAVE_BUILTIN_BITREVERSE16 +) +if(HAVE_BUILTIN_BITREVERSE16) + add_definitions(-DHAVE_BUILTIN_BITREVERSE16) +endif() +set(CMAKE_REQUIRED_FLAGS) + # # check for ptrdiff_t support # diff --git a/configure b/configure index 1aef2c367..5b95e37d6 100755 --- a/configure +++ b/configure @@ -1143,6 +1143,19 @@ else echo "Checking for __builtin_ctzll ... No." | tee -a configure.log fi +# Check for __builtin_bitreverse16() support in compiler +cat > $test.c << EOF +unsigned short f(unsigned short x) { return __builtin_bitreverse16(x); } +int main(void) { return 0; } +EOF +if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then + echo "Checking for __builtin_bitreverse16 ... Yes." | tee -a configure.log + CFLAGS="$CFLAGS -DHAVE_BUILTIN_BITREVERSE16" + SFLAGS="$SFLAGS -DHAVE_BUILTIN_BITREVERSE16" +else + echo "Checking for __builtin_bitreverse16 ... No." | tee -a configure.log +fi + check_avx2_intrinsics() { # Check whether compiler supports AVX2 intrinsics cat > $test.c << EOF diff --git a/deflate_p.h b/deflate_p.h index ecaad5c55..991c897a5 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -10,6 +10,7 @@ #define DEFLATE_P_H #include "functable.h" +#include "fallback_builtins.h" /* Forward declare common non-inlined functions declared in deflate.c */ @@ -127,13 +128,11 @@ Z_FORCEINLINE static void flush_pending_inline(PREFIX3(stream) *strm) { /* =========================================================================== * Reverse the first len bits of a code using bit manipulation */ -static inline uint16_t bi_reverse(unsigned code, int len) { +Z_FORCEINLINE static uint16_t bi_reverse(unsigned code, int len) { /* code: the value to invert */ /* len: its bit length */ Assert(len >= 1 && len <= 15, "code length must be 1-15"); -#define bitrev8(b) \ - (uint8_t)((((uint8_t)(b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) - return (bitrev8(code >> 8) | (uint16_t)bitrev8(code) << 8) >> (16 - len); + return __builtin_bitreverse16((uint16_t)code) >> (16 - len); } /* =========================================================================== diff --git a/fallback_builtins.h b/fallback_builtins.h index 8303508fa..89734eda4 100644 --- a/fallback_builtins.h +++ b/fallback_builtins.h @@ -12,7 +12,7 @@ * If tzcnt instruction is not supported, the cpu will itself execute bsf instead. * Performance tzcnt/bsf is identical on Intel cpu, tzcnt is faster than bsf on AMD cpu. */ -static __forceinline int __builtin_ctz(unsigned int value) { +Z_FORCEINLINE static int __builtin_ctz(unsigned int value) { Assert(value != 0, "Invalid input value: 0"); # if defined(X86_FEATURES) && !(_MSC_VER < 1700) return (int)_tzcnt_u32(value); @@ -28,7 +28,7 @@ static __forceinline int __builtin_ctz(unsigned int value) { /* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0. * Because of that assumption trailing_zero is not initialized and the return value is not checked. */ -static __forceinline int __builtin_ctzll(unsigned long long value) { +Z_FORCEINLINE static int __builtin_ctzll(unsigned long long value) { Assert(value != 0, "Invalid input value: 0"); # if defined(X86_FEATURES) && !(_MSC_VER < 1700) return (int)_tzcnt_u64(value); @@ -44,4 +44,16 @@ static __forceinline int __builtin_ctzll(unsigned long long value) { #endif // Microsoft AMD64/IA64/x86/ARM/ARM64 test #endif // _MSC_VER & !clang +#ifndef HAVE_BUILTIN_BITREVERSE16 +/* Bit reversal for 8-bit values using multiplication method */ +#define bitrev8(value) \ + (uint8_t)((((uint8_t)(value) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) + +/* General purpose bit reversal for 16-bit values */ +Z_FORCEINLINE static uint16_t __builtin_bitreverse16(uint16_t value) { + return ((bitrev8(value >> 8) | (uint16_t)bitrev8(value) << 8)); +} +#define HAVE_BUILTIN_BITREVERSE16 +#endif + #endif // include guard FALLBACK_BUILTINS_H