From: Yann Collet Date: Wed, 10 Oct 2018 22:48:43 +0000 (-0700) Subject: improved decompression speed X-Git-Tag: v1.3.7~6^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d3ec23313d4b9f9856608c18d7d6d729615904f9;p=thirdparty%2Fzstd.git improved decompression speed while reviewing #1364, I found a decompression speed improvement. On my laptop, the new code decompresses +5-6% faster on clang and +2-3% faster on gcc. not bad for an accidental optimization... --- diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 2f91460c5..00b22049a 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -339,17 +339,8 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { -#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ -# if defined(__x86_64__) - if (sizeof(bitContainer)==8) - return _bextr_u64(bitContainer, start, nbBits); - else -# endif - return _bextr_u32(bitContainer, start, nbBits); -#else assert(nbBits < BIT_MASK_SIZE); return (bitContainer >> start) & BIT_mask[nbBits]; -#endif } MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) @@ -366,9 +357,11 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * @return : value extracted */ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { -#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ +#if 1 + assert(bitD->bitsConsumed + nbBits <= sizeof(bitD->bitContainer)*8); return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); #else + /* previous code path, seems slower */ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); #endif