From 529cd7b82174cde8a6ad62a8d6ec666b530cb3ac Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 7 Feb 2022 12:22:04 -0500 Subject: [PATCH] Fix nits --- lib/common/bits.h | 251 ++++++++++++-------------------- lib/compress/zstd_lazy.c | 12 +- lib/decompress/huf_decompress.c | 1 + 3 files changed, 102 insertions(+), 162 deletions(-) diff --git a/lib/common/bits.h b/lib/common/bits.h index 65fb456db..40d86a750 100644 --- a/lib/common/bits.h +++ b/lib/common/bits.h @@ -14,16 +14,19 @@ #include "mem.h" MEM_STATIC unsigned ZSTD_highbit32_fallback(U32 val) { - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, - 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, - 19, 27, 23, 6, 26, 5, 4, 31 }; - val |= val >> 1; - val |= val >> 2; - val |= val >> 4; - val |= val >> 8; - val |= val >> 16; - return DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; + assert(val != 0); + { + static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; + } } MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ @@ -34,16 +37,11 @@ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCo # if STATIC_BMI2 == 1 return _lzcnt_u32(val)^31; # else - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)r; # endif -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ +# elif defined(__GNUC__) && (__GNUC__ >= 4) /* GCC Intrinsic */ return (unsigned)__builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); @@ -55,45 +53,54 @@ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCo MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) { - static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, - 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, - 26, 12, 18, 6, 11, 5, 10, 9 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; + assert(val != 0); + { + static const int DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9}; + return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; + } } MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) { assert(val != 0); # if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanForward(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) + unsigned long r; + _BitScanForward(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 4) return (unsigned)__builtin_ctz(val); -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return __CTZ(val); # else return ZSTD_countTrailingZeros32_fallback(val); # endif } -MEM_STATIC unsigned ZSTD_countTrailingZeros64_fallback(U64 val) +MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) { + assert(val != 0); + { + unsigned result = 0; + while ((val & 0x80000000) == 0) { + result++; + val <<= 1; + } + return result; + } +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) { - static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, - 4, 25, 14, 28, 9, 34, 20, 56, - 5, 17, 26, 54, 15, 41, 29, 43, - 10, 31, 38, 35, 21, 45, 49, 57, - 63, 6, 12, 18, 24, 27, 33, 55, - 16, 53, 40, 42, 30, 37, 44, 48, - 62, 11, 23, 32, 52, 39, 36, 47, - 61, 22, 51, 46, 60, 50, 59, 58 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; + assert(val != 0); +# if defined(_MSC_VER) + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)__builtin_clz(val); +# else + return ZSTD_countLeadingZeros32_fallback(val); +# endif } MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) @@ -103,142 +110,66 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) # if STATIC_BMI2 return _tzcnt_u64(val); # else - if (val != 0) { - unsigned long r; - _BitScanForward64(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } + unsigned long r; + _BitScanForward64(&r, val); + return (unsigned)r; # endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - if (MEM_32bits()) { +# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__) + return (unsigned)__builtin_ctzll(val); +# else + { U32 mostSignificantWord = (U32)(val >> 32); U32 leastSignificantWord = (U32)val; if (leastSignificantWord == 0) { - return 32 + (unsigned)__builtin_ctz(mostSignificantWord); + return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); } else { - return (unsigned)__builtin_ctz(leastSignificantWord); + return ZSTD_countTrailingZeros32(leastSignificantWord); } - } else { - return (unsigned)__builtin_ctzll(val); } -# else - return ZSTD_countTrailingZeros64_fallback(val); # endif } -MEM_STATIC unsigned ZSTD_NbCommonBytes_fallback(size_t val) +MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) { - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; - } else { /* 32 bits */ - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; - } - } else { /* Big Endian CPU */ - unsigned r; - if (MEM_64bits()) { - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; - } else { /* 32 bits */ - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; + assert(val != 0); +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _lzcnt_u64(val); +# else + unsigned long r; + _BitScanReverse64(&r, val); + return (unsigned)r; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)(__builtin_clzll(val)); +# else + { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (mostSignificantWord == 0) { + return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); + } else { + return ZSTD_countLeadingZeros32(mostSignificantWord); + } } - } +# endif } MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) { if (MEM_isLittleEndian()) { if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) -# if STATIC_BMI2 - return _tzcnt_u64(val) >> 3; -# else - if (val != 0) { - unsigned long r; - _BitScanForward64(&r, (U64)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (unsigned)(__builtin_ctzll((U64)val) >> 3); -# else - return ZSTD_NbCommonBytes_fallback(val); -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_ctz((U32)val) >> 3); -# else - return ZSTD_NbCommonBytes_fallback(val); -# endif + return ZSTD_countTrailingZeros64((U64)val) >> 3; + } else { + return ZSTD_countTrailingZeros32((U32)val) >> 3; } } else { /* Big Endian CPU */ if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) -# if STATIC_BMI2 - return _lzcnt_u64(val) >> 3; -# else - if (val != 0) { - unsigned long r; - _BitScanReverse64(&r, (U64)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (unsigned)(__builtin_clzll(val) >> 3); -# else - return ZSTD_NbCommonBytes_fallback(val); -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, (unsigned long)val); - return (unsigned)(r >> 3); - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (unsigned)(__builtin_clz((U32)val) >> 3); -# else - return ZSTD_NbCommonBytes_fallback(val); -# endif - } } + return ZSTD_countLeadingZeros64((U64)val) >> 3; + } else { + return ZSTD_countLeadingZeros32((U32)val) >> 3; + } + } } #endif /* ZSTD_BITS_H */ diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index c619aea3a..278995491 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -766,6 +766,14 @@ size_t ZSTD_HcFindBestMatch( typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */ +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) { + return ZSTD_countTrailingZeros64(val); +} + /* ZSTD_rotateRight_*(): * Rotates a bitfield to the right by "count" bits. * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts @@ -1165,7 +1173,7 @@ size_t ZSTD_RowFindBestMatch( /* Cycle through the matches and prefetch */ for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { - U32 const matchPos = (head + ZSTD_countTrailingZeros64(matches)) & rowMask; + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; U32 const matchIndex = row[matchPos]; assert(numMatches < rowEntries); if (matchIndex < lowLimit) @@ -1233,7 +1241,7 @@ size_t ZSTD_RowFindBestMatch( ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { - U32 const matchPos = (head + ZSTD_countTrailingZeros64(matches)) & rowMask; + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; U32 const matchIndex = dmsRow[matchPos]; if (matchIndex < dmsLowestIndex) break; diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 4541ca98c..c6fd92860 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -263,6 +263,7 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs return ERROR(corruption_detected); /* Construct the BIT_DStream_t. */ + assert(sizeof(size_t) == 8); bit->bitContainer = MEM_readLE64(args->ip[stream]); bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); bit->start = (const char*)args->iend[0]; -- 2.47.2