From: Elliot Gorokhovsky Date: Mon, 31 Jan 2022 19:59:51 +0000 (-0500) Subject: Pull out software fallbacks X-Git-Tag: v1.5.4^2~238^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=796182652d652d57fca9ece6c63715db2d0e0858;p=thirdparty%2Fzstd.git Pull out software fallbacks --- diff --git a/lib/common/bits.h b/lib/common/bits.h index 67db5a9e7..65fb456db 100644 --- a/lib/common/bits.h +++ b/lib/common/bits.h @@ -13,6 +13,19 @@ #include "mem.h" +MEM_STATIC unsigned ZSTD_highbit32_fallback(U32 val) { + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; +} + MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ { assert(val != 0); @@ -35,18 +48,20 @@ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCo # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); # else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; + return ZSTD_highbit32_fallback(val); # endif } } +MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) +{ + static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +} + MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) { assert(val != 0); @@ -64,14 +79,23 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) # elif defined(__ICCARM__) /* IAR Intrinsic */ return __CTZ(val); # else - static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, - 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, - 26, 12, 18, 6, 11, 5, 10, 9 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; + return ZSTD_countTrailingZeros32_fallback(val); # endif } +MEM_STATIC unsigned ZSTD_countTrailingZeros64_fallback(U64 val) +{ + static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, + 4, 25, 14, 28, 9, 34, 20, 56, + 5, 17, 26, 54, 15, 41, 29, 43, + 10, 31, 38, 35, 21, 45, 49, 57, + 63, 6, 12, 18, 24, 27, 33, 55, + 16, 53, 40, 42, 30, 37, 44, 48, + 62, 11, 23, 32, 52, 39, 36, 47, + 61, 22, 51, 46, 60, 50, 59, 58 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +} + MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) { assert(val != 0); @@ -101,18 +125,46 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) return (unsigned)__builtin_ctzll(val); } # else - static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, - 4, 25, 14, 28, 9, 34, 20, 56, - 5, 17, 26, 54, 15, 41, 29, 43, - 10, 31, 38, 35, 21, 45, 49, 57, - 63, 6, 12, 18, 24, 27, 33, 55, - 16, 53, 40, 42, 30, 37, 44, 48, - 62, 11, 23, 32, 52, 39, 36, 47, - 61, 22, 51, 46, 60, 50, 59, 58 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; + return ZSTD_countTrailingZeros64_fallback(val); # endif } +MEM_STATIC unsigned ZSTD_NbCommonBytes_fallback(size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; + } else { /* 32 bits */ + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; + } + } else { /* Big Endian CPU */ + unsigned r; + if (MEM_64bits()) { + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; + } else { /* 32 bits */ + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; + } + } +} + MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) { if (MEM_isLittleEndian()) { @@ -133,15 +185,7 @@ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) # elif defined(__GNUC__) && (__GNUC__ >= 4) return (unsigned)(__builtin_ctzll((U64)val) >> 3); # else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; + return ZSTD_NbCommonBytes_fallback(val); # endif } else { /* 32 bits */ # if defined(_MSC_VER) @@ -156,11 +200,7 @@ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) # elif defined(__GNUC__) && (__GNUC__ >= 3) return (unsigned)(__builtin_ctz((U32)val) >> 3); # else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; + return ZSTD_NbCommonBytes_fallback(val); # endif } } else { /* Big Endian CPU */ @@ -181,12 +221,7 @@ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) # elif defined(__GNUC__) && (__GNUC__ >= 4) return (unsigned)(__builtin_clzll(val) >> 3); # else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; + return ZSTD_NbCommonBytes_fallback(val); # endif } else { /* 32 bits */ # if defined(_MSC_VER) @@ -201,10 +236,7 @@ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) # elif defined(__GNUC__) && (__GNUC__ >= 3) return (unsigned)(__builtin_clz((U32)val) >> 3); # else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; + return ZSTD_NbCommonBytes_fallback(val); # endif } } } diff --git a/lib/common/mem.h b/lib/common/mem.h index 85581c384..4b10f7c1f 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -257,6 +257,14 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) #endif /* MEM_FORCE_MEMORY_ACCESS */ +MEM_STATIC U32 MEM_swap32_fallback(U32 in) +{ + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +} + MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */ @@ -265,22 +273,13 @@ MEM_STATIC U32 MEM_swap32(U32 in) || (defined(__clang__) && __has_builtin(__builtin_bswap32)) return __builtin_bswap32(in); #else - return ((in << 24) & 0xff000000 ) | - ((in << 8) & 0x00ff0000 ) | - ((in >> 8) & 0x0000ff00 ) | - ((in >> 24) & 0x000000ff ); + return MEM_swap32_fallback(in); #endif } -MEM_STATIC U64 MEM_swap64(U64 in) +MEM_STATIC U64 MEM_swap64_fallback(U64 in) { -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_uint64(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap64)) - return __builtin_bswap64(in); -#else - return ((in << 56) & 0xff00000000000000ULL) | + return ((in << 56) & 0xff00000000000000ULL) | ((in << 40) & 0x00ff000000000000ULL) | ((in << 24) & 0x0000ff0000000000ULL) | ((in << 8) & 0x000000ff00000000ULL) | @@ -288,6 +287,17 @@ MEM_STATIC U64 MEM_swap64(U64 in) ((in >> 24) & 0x0000000000ff0000ULL) | ((in >> 40) & 0x000000000000ff00ULL) | ((in >> 56) & 0x00000000000000ffULL); +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return MEM_swap64_fallback(in); #endif }