]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
force inlining of HUF_decodeSymbol*() functions
authorYann Collet <cyan@fb.com>
Thu, 1 Mar 2018 19:28:42 +0000 (11:28 -0800)
committerYann Collet <cyan@fb.com>
Thu, 1 Mar 2018 19:31:45 +0000 (11:31 -0800)
which was not done properly by gcc 4.8
resulting in major performance difference.

ex :
zstd -b1 silesia.tar
before : dec 680 MB/s
after  : dec 710 MB/s  (without bmi2)
after  : dec 770 MB/s  (with DYNAMIC_BMI2)

lib/decompress/huf_decompress.c

index d2e1bb4ac0c171018d1076621d461a60130c6351..8b292e7923dbc4ad34980acff50978c9c08a3172 100644 (file)
@@ -143,7 +143,8 @@ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
 
 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
 
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
 {
     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
     BYTE const c = dt[val].byte;
@@ -305,7 +306,8 @@ HUF_decompress4X2_usingDTable_internal_body(
 }
 
 
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
 {
     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
     memcpy(op, dt+val, 2);