From 7c3d1cb3ab9b43745c65bb1796960dbbaa237a02 Mon Sep 17 00:00:00 2001 From: Ilya Tokar Date: Tue, 1 Mar 2022 18:49:10 -0500 Subject: [PATCH] Enable STATIC_BMI2 for gcc/clang Some usage (e.g. BIT_getLowerBit) uses it without checking for MSVC, so enabling for clang gives a small performance boost. --- lib/common/bitstream.h | 8 ++++---- lib/common/compiler.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 43a221678..731630ea4 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -37,8 +37,8 @@ extern "C" { * Target specific =========================================*/ #ifndef ZSTD_NO_INTRINSICS -# if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ +# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__) +# include /* support for bextr (experimental)/bzhi */ # elif defined(__ICCARM__) # include # endif @@ -164,8 +164,8 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { -#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 - return _bzhi_u64(bitContainer, nbBits); +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) + return _bzhi_u64(bitContainer, nbBits); #else assert(nbBits < BIT_MASK_SIZE); return bitContainer & BIT_mask[nbBits]; diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 516930c01..6c7100e83 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -181,6 +181,8 @@ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 # define STATIC_BMI2 1 # endif +# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__) +# define STATIC_BMI2 1 # endif #endif -- 2.47.2