From: Nick Terrell Date: Tue, 15 Aug 2017 00:20:50 +0000 (-0700) Subject: [libzstd] Fix FORCE_INLINE macro X-Git-Tag: v1.3.1^2~11^2~3^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=565e925eb7a48a4fe4cda899b00b27863e046ebb;p=thirdparty%2Fzstd.git [libzstd] Fix FORCE_INLINE macro --- diff --git a/lib/common/compiler.h b/lib/common/compiler.h new file mode 100644 index 000000000..69eb7b910 --- /dev/null +++ b/lib/common/compiler.h @@ -0,0 +1,76 @@ +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to elimininate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + +/* prefetch */ +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +#elif defined(__GNUC__) +# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +#else +# define PREFETCH(ptr) /* disabled */ +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +#endif /* ZSTD_COMPILER_H */ diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 8474a4c07..6bcc6b20a 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -33,33 +33,13 @@ ****************************************************************** */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - /* ************************************************************** * Includes ****************************************************************/ #include /* malloc, free, qsort */ #include /* memcpy, memset */ #include "bitstream.h" +#include "compiler.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" @@ -216,7 +196,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) return 0; } -FORCE_INLINE size_t FSE_decompress_usingDTable_generic( +FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt, const unsigned fast) diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c index eb44222c5..9d9c0e963 100644 --- a/lib/common/xxhash.c +++ b/lib/common/xxhash.c @@ -113,19 +113,25 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp /* ************************************* * Compiler Specific Options ***************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# define FORCE_INLINE static __forceinline +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline #else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR + + +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif @@ -248,7 +254,7 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); @@ -256,7 +262,7 @@ FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_a return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } -FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } @@ -266,7 +272,7 @@ static U32 XXH_readBE32(const void* ptr) return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } -FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); @@ -274,7 +280,7 @@ FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_a return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } -FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } @@ -335,7 +341,7 @@ static U32 XXH32_round(U32 seed, U32 input) return seed; } -FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; @@ -435,7 +441,7 @@ static U64 XXH64_mergeRound(U64 acc, U64 val) return acc; } -FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -584,7 +590,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long } -FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -654,7 +660,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* -FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem32; const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; @@ -704,7 +710,7 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) /* **** XXH64 **** */ -FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -771,7 +777,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* -FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem64; const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 1621bca61..66ea832e1 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -10,42 +10,11 @@ #ifndef ZSTD_CCOMMON_H_MODULE #define ZSTD_CCOMMON_H_MODULE -/*-******************************************************* -* Compiler specifics -*********************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ -# pragma warning(disable : 4324) /* disable: C4324: padded structure */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - -#ifdef _MSC_VER -# define FORCE_NOINLINE static __declspec(noinline) -#else -# ifdef __GNUC__ -# define FORCE_NOINLINE static __attribute__((__noinline__)) -# else -# define FORCE_NOINLINE static -# endif -#endif - /*-************************************* * Dependencies ***************************************/ +#include "compiler.h" #include "mem.h" #include "error_private.h" #define ZSTD_STATIC_LINKING_ONLY diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 3a03627cc..50a130250 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -32,27 +32,6 @@ - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - /* ************************************************************** * Includes ****************************************************************/ @@ -60,6 +39,7 @@ #include /* memcpy, memset */ #include /* printf (debug) */ #include "bitstream.h" +#include "compiler.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a70a66684..0a7a98a96 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1483,7 +1483,7 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) @@ -1726,7 +1726,7 @@ static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U3 } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) @@ -2284,7 +2284,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( /* Update chains up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; @@ -2308,7 +2308,7 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) /* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_generic ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, @@ -2360,7 +2360,7 @@ size_t ZSTD_HcFindBestMatch_generic ( } -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -2377,7 +2377,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( } -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -2397,7 +2397,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( /* ******************************* * Common parser - lazy strategy *********************************/ -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) @@ -2559,7 +2559,7 @@ static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t sr } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 53e806eb7..e0af5bfa5 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -22,7 +22,7 @@ /*-************************************* * Price functions for optimal parser ***************************************/ -FORCE_INLINE void ZSTD_setLog2Prices(optState_t* optPtr) +static void ZSTD_setLog2Prices(optState_t* optPtr) { optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); @@ -32,7 +32,7 @@ FORCE_INLINE void ZSTD_setLog2Prices(optState_t* optPtr) } -MEM_STATIC void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize) +static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize) { unsigned u; @@ -96,7 +96,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t sr } -FORCE_INLINE U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) +static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) { U32 price, u; @@ -137,7 +137,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const B } -FORCE_INLINE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) +FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) { /* offset */ U32 price; @@ -159,7 +159,7 @@ FORCE_INLINE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* li } -MEM_STATIC void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { U32 u; @@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* /* function safe only for comparisons */ -MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) { switch (length) { @@ -219,7 +219,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) { U32* const hashTable3 = zc->hashTable3; @@ -412,7 +412,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( /*-******************************* * Optimal parser *********************************/ -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const int ultra) { @@ -662,7 +662,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const int ultra) { diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 0a47a3d74..6f6c3d4f0 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -32,30 +32,12 @@ - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - /* ************************************************************** * Dependencies ****************************************************************/ #include /* memcpy, memset */ #include "bitstream.h" /* BIT_* */ +#include "compiler.h" #include "fse.h" /* header compression */ #define HUF_STATIC_LINKING_ONLY #include "huf.h" @@ -180,7 +162,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con if (MEM_64bits()) \ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) -FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) { BYTE* const pStart = p; @@ -639,7 +621,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE if (MEM_64bits()) \ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) -FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) { BYTE* const pStart = p; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 159b7b15b..db58a0092 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -53,15 +53,6 @@ # include "zstd_legacy.h" #endif -#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define ZSTD_PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) -#elif defined(__GNUC__) -# define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) -#else -# define ZSTD_PREFETCH(ptr) /* disabled */ -#endif - /*-************************************* * Errors @@ -953,7 +944,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) } -FORCE_INLINE +HINT_INLINE size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, @@ -1097,7 +1088,7 @@ static size_t ZSTD_decompressSequences( } -FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets) +FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets) { seq_t seq; @@ -1197,7 +1188,7 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const window } } -FORCE_INLINE +HINT_INLINE size_t ZSTD_execSequenceLong(BYTE* op, BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, @@ -1333,7 +1324,7 @@ static size_t ZSTD_decompressSequencesLong( seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize32); size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - ZSTD_PREFETCH(sequence.match); + PREFETCH(sequence.match); sequences[seqNb&STOSEQ_MASK] = sequence; op += oneSeqSize; }