From: Elliot Gorokhovsky Date: Fri, 21 Jan 2022 18:29:14 +0000 (-0700) Subject: Move bitwise builtins into bits.h X-Git-Tag: v1.5.4^2~238^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db2f4a6532532b9d532d7db34212fea3b1fc02f9;p=thirdparty%2Fzstd.git Move bitwise builtins into bits.h --- diff --git a/lib/common/bits.h b/lib/common/bits.h new file mode 100644 index 000000000..67db5a9e7 --- /dev/null +++ b/lib/common/bits.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_BITS_H +#define ZSTD_BITS_H + +#include "mem.h" + +MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + if (val != 0) { + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return (unsigned)__builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) +{ + assert(val != 0); +# if defined(_MSC_VER) + if (val != 0) { + unsigned long r; + _BitScanForward(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (unsigned)__builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else + static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif +} + +MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) +{ + assert(val != 0); +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _tzcnt_u64(val); +# else + if (val != 0) { + unsigned long r; + _BitScanForward64(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + if (MEM_32bits()) { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (leastSignificantWord == 0) { + return 32 + (unsigned)__builtin_ctz(mostSignificantWord); + } else { + return (unsigned)__builtin_ctz(leastSignificantWord); + } + } else { + return (unsigned)__builtin_ctzll(val); + } +# else + static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, + 4, 25, 14, 28, 9, 34, 20, 56, + 5, 17, 26, 54, 15, 41, 29, 43, + 10, 31, 38, 35, 21, 45, 49, 57, + 63, 6, 12, 18, 24, 27, 33, 55, + 16, 53, 40, 42, 30, 37, 44, 48, + 62, 11, 23, 32, 52, 39, 36, 47, + 61, 22, 51, 46, 60, 50, 59, 58 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif +} + +MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _tzcnt_u64(val) >> 3; +# else + if (val != 0) { + unsigned long r; + _BitScanForward64(&r, (U64)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)(__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + if (val != 0) { + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (unsigned)(__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _lzcnt_u64(val) >> 3; +# else + if (val != 0) { + unsigned long r; + _BitScanReverse64(&r, (U64)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)(__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + if (val != 0) { + unsigned long r; + _BitScanReverse(&r, (unsigned long)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (unsigned)(__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + +#endif /* ZSTD_BITS_H */ diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 84b6062ff..103b37832 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -30,6 +30,7 @@ extern "C" { #include "compiler.h" /* UNLIKELY() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ +#include "bits.h" /* ZSTD_highbit32 */ /*========================================= @@ -132,48 +133,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); /* faster, but works only if nbBits >= 1 */ - - -/*-************************************************************** -* Internal functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (U32 val) -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ -# if STATIC_BMI2 == 1 - return _lzcnt_u32(val) ^ 31; -# else - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, - 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, - 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; -# endif - } -} - /*===== Local Constants =====*/ static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, @@ -291,7 +250,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); bitD->bitContainer = MEM_readLEST(bitD->ptr); { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } } else { bitD->ptr = bitD->start; @@ -319,7 +278,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si default: break; } { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ } bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 4229b40c5..98bd4238d 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -21,6 +21,7 @@ #include "fse.h" #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ #include "huf.h" +#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ /*=== Version ===*/ @@ -38,34 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -static U32 FSE_ctz(U32 val) -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - if (val != 0) { - unsigned long r; - _BitScanForward(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_ctz(val); -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return __CTZ(val); -# else /* Software version */ - U32 count = 0; - while ((val & 1) == 0) { - val >>= 1; - ++count; - } - return count; -# endif - } -} - FORCE_INLINE_TEMPLATE size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) @@ -113,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * repeat. * Avoid UB by setting the high bit to 1. */ - int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; while (repeats >= 12) { charnum += 3 * 12; if (LIKELY(ip <= iend-7)) { @@ -124,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne ip = iend - 4; } bitStream = MEM_readLE32(ip) >> bitCount; - repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; } charnum += 3 * repeats; bitStream >>= 2 * repeats; @@ -189,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * know that threshold > 1. */ if (remaining <= 1) break; - nbBits = BIT_highbit32(remaining) + 1; + nbBits = ZSTD_highbit32(remaining) + 1; threshold = 1 << (nbBits - 1); } if (charnum >= maxSV1) break; @@ -312,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); *tableLogPtr = tableLog; /* determine last weight */ { U32 const total = 1 << tableLog; U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; + U32 const verif = 1 << ZSTD_highbit32(rest); + U32 const lastWeight = ZSTD_highbit32(rest) + 1; if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ huffWeight[oSize] = (BYTE)lastWeight; rankStats[lastWeight]++; diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index bc0c1be2f..184b35dd1 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -24,6 +24,7 @@ #include "error_private.h" #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" +#include "bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -166,7 +167,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo for (u=0; u= 3) /* GCC Intrinsic */ - return (U32)__builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; -# endif - } -} - -/** - * Counts the number of trailing zeros of a `size_t`. - * Most compilers should support CTZ as a builtin. A backup - * implementation is provided if the builtin isn't supported, but - * it may not be terribly efficient. - */ -MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) -{ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) -# if STATIC_BMI2 - return _tzcnt_u64(val); -# else - if (val != 0) { - unsigned long r; - _BitScanForward64(&r, (U64)val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (unsigned)__builtin_ctzll((U64)val); -# else - static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, - 4, 25, 14, 28, 9, 34, 20, 56, - 5, 17, 26, 54, 15, 41, 29, 43, - 10, 31, 38, 35, 21, 45, 49, 57, - 63, 6, 12, 18, 24, 27, 33, 55, - 16, 53, 40, 42, 30, 37, 44, 48, - 62, 11, 23, 32, 52, 39, 36, 47, - 61, 22, 51, 46, 60, 50, 59, 58 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)__builtin_ctz((U32)val); -# else - static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, - 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, - 26, 12, 18, 6, 11, 5, 10, 9 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } -} - - /* ZSTD_invalidateRepCodes() : * ensures next compression will not use repcodes from previous block. * Note : only works with regular variant; diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 5547b4ac0..21be8c54f 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -26,6 +26,7 @@ #define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MATH64 #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ +#include "../common/bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -191,7 +192,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, break; default : assert(normalizedCounter[s] > 1); - { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); + { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); @@ -355,8 +356,8 @@ void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) { - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; assert(srcSize > 1); /* Not supported, RLE should be used instead */ return minBits; @@ -364,7 +365,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) { - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; U32 tableLog = maxTableLog; U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); assert(srcSize > 1); /* Not supported, RLE should be used instead */ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index a8677733b..5d90c162e 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -32,6 +32,7 @@ #define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" #include "../common/error_private.h" +#include "../common/bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -407,7 +408,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits /* Try to reduce the next power of 2 above totalCost because we * gain back half the rank. */ - U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { U32 const highPos = rankLast[nBitsToDecrease]; U32 const lowPos = rankLast[nBitsToDecrease-1]; @@ -505,7 +506,7 @@ typedef struct { */ #define RANK_POSITION_MAX_COUNT_LOG 32 #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ -#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ +#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ /* Return the appropriate bucket index for a given count. See definition of * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. @@ -513,7 +514,7 @@ typedef struct { static U32 HUF_getIndex(U32 const count) { return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) ? count - : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; + : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; } /* Helper swap function for HUF_quickSortPartition() */ @@ -870,7 +871,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id #if DEBUGLEVEL >= 1 { size_t const nbBits = HUF_getNbBits(elt); - size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; + size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; (void)dirtyBits; /* Middle bits are 0. */ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 37b6e2ff4..aa1a9f35b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -27,6 +27,7 @@ #include "zstd_opt.h" #include "zstd_ldm.h" #include "zstd_compress_superblock.h" +#include "../common/bits.h" /* ZSTD_highbit32 */ /* *************************************************************** * Tuning parameters diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index f027f4612..bbae53303 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -23,6 +23,7 @@ #ifdef ZSTD_MULTITHREAD # include "zstdmt_compress.h" #endif +#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ #if defined (__cplusplus) extern "C" { @@ -699,103 +700,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) /*-************************************* * Match length counter ***************************************/ -static unsigned ZSTD_NbCommonBytes (size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) -# if STATIC_BMI2 - return _tzcnt_u64(val) >> 3; -# else - if (val != 0) { - unsigned long r; - _BitScanForward64(&r, (U64)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (unsigned)(__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) -# if STATIC_BMI2 - return _lzcnt_u64(val) >> 3; -# else - if (val != 0) { - unsigned long r; - _BitScanReverse64(&r, (U64)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (unsigned)(__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, (unsigned long)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) { const BYTE* const pStart = pIn; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 47c0687c9..c619aea3a 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -10,6 +10,7 @@ #include "zstd_compress_internal.h" #include "zstd_lazy.h" +#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ /*-************************************* @@ -765,44 +766,6 @@ size_t ZSTD_HcFindBestMatch( typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */ -/* ZSTD_VecMask_next(): - * Starting from the LSB, returns the idx of the next non-zero bit. - * Basically counting the nb of trailing zeroes. - */ -static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { - assert(val != 0); -# if defined(_MSC_VER) && defined(_WIN64) - if (val != 0) { - unsigned long r; - _BitScanForward64(&r, val); - return (U32)(r); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)))) - if (sizeof(size_t) == 4) { - U32 mostSignificantWord = (U32)(val >> 32); - U32 leastSignificantWord = (U32)val; - if (leastSignificantWord == 0) { - return 32 + (U32)__builtin_ctz(mostSignificantWord); - } else { - return (U32)__builtin_ctz(leastSignificantWord); - } - } else { - return (U32)__builtin_ctzll(val); - } -# else - /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count - * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer - */ - val = ~val & (val - 1ULL); /* Lowest set bit mask */ - val = val - ((val >> 1) & 0x5555555555555555); - val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); - return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); -# endif -} - /* ZSTD_rotateRight_*(): * Rotates a bitfield to the right by "count" bits. * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts @@ -1202,7 +1165,7 @@ size_t ZSTD_RowFindBestMatch( /* Cycle through the matches and prefetch */ for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { - U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchPos = (head + ZSTD_countTrailingZeros64(matches)) & rowMask; U32 const matchIndex = row[matchPos]; assert(numMatches < rowEntries); if (matchIndex < lowLimit) @@ -1270,7 +1233,7 @@ size_t ZSTD_RowFindBestMatch( ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { - U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchPos = (head + ZSTD_countTrailingZeros64(matches)) & rowMask; U32 const matchIndex = dmsRow[matchPos]; if (matchIndex < dmsLowestIndex) break; diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 202718825..4541ca98c 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -23,6 +23,7 @@ #include "../common/huf.h" #include "../common/error_private.h" #include "../common/zstd_internal.h" +#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ /* ************************************************************** * Constants @@ -142,7 +143,7 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) static size_t HUF_initDStream(BYTE const* ip) { BYTE const lastByte = ip[7]; - size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; size_t const value = MEM_readLEST(ip) | 1; assert(bitsConsumed <= 8); return value << bitsConsumed; @@ -263,7 +264,7 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs /* Construct the BIT_DStream_t. */ bit->bitContainer = MEM_readLE64(args->ip[stream]); - bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); + bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); bit->start = (const char*)args->iend[0]; bit->limitPtr = bit->start + sizeof(size_t); bit->ptr = (const char*)args->ip[stream]; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 44649dd37..8950453f4 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -66,6 +66,7 @@ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ +#include "../common/bits.h" /* ZSTD_highbit32 */ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) # include "../legacy/zstd_legacy.h" diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 758c7ad99..6f8a75f53 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -26,6 +26,7 @@ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_decompress_block.h" +#include "../common/bits.h" /* ZSTD_highbit32 */ /*_******************************************************* * Macros @@ -551,7 +552,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, for (u=0; u> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - if (val != 0) { - unsigned long r; - _BitScanReverse64(&r, val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, (unsigned long)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - /*! ZDICT_count() : Count the nb of common bytes between 2 pointers. Note : this function presumes end of buffer followed by noisy guard band. @@ -223,7 +145,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch) pMatch = (const char*)pMatch+sizeof(size_t); continue; } - pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff); return (size_t)((const char*)pIn - pStart); } } diff --git a/tests/fuzz/huf_round_trip.c b/tests/fuzz/huf_round_trip.c index 0e26ca9b5..bda9a9842 100644 --- a/tests/fuzz/huf_round_trip.c +++ b/tests/fuzz/huf_round_trip.c @@ -24,11 +24,12 @@ #include "common/huf.h" #include "fuzz_helpers.h" #include "fuzz_data_producer.h" +#include "common/bits.h" static size_t adjustTableLog(size_t tableLog, size_t maxSymbol) { size_t const alphabetSize = maxSymbol + 1; - size_t minTableLog = BIT_highbit32(alphabetSize) + 1; + size_t minTableLog = ZSTD_highbit32(alphabetSize) + 1; if ((alphabetSize & (alphabetSize - 1)) != 0) { ++minTableLog; }