From: senhuang42 Date: Tue, 6 Jul 2021 14:08:15 +0000 (-0400) Subject: Improve branch misses on FSE symbol spreading X-Git-Tag: v1.5.1~1^2~126^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=da095ed8991cdb2932042e208c4453fc681ce10a;p=thirdparty%2Fzstd.git Improve branch misses on FSE symbol spreading --- diff --git a/lib/common/fse.h b/lib/common/fse.h index 19dd4febc..df8280ccd 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -337,7 +337,7 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. */ -#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index b4297ec88..52e0bdf2c 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -75,9 +75,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); U32 const step = FSE_TABLESTEP(tableSize); + U32 const maxSV1 = maxSymbolValue+1; - U32* cumul = (U32*)workSpace; - FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + U16* cumul = (U16*)workSpace; /* size = maxSV1 */ + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ + BYTE* spread = tableSymbol + tableSize; /* size = tableSize */ U32 highThreshold = tableSize-1; @@ -98,20 +100,59 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, /* symbol start positions */ { U32 u; cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { + for (u=1; u <= maxSV1; u++) { if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ cumul[u] = cumul[u-1] + 1; tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); } else { cumul[u] = cumul[u-1] + normalizedCounter[u-1]; } } - cumul[maxSymbolValue+1] = tableSize+1; + cumul[maxSV1] = (U16)(tableSize+1); } /* Spread symbols */ - { U32 position = 0; + if (highThreshold == tableSize - 1) { + /* Case for no low prob count symbols. Lay down 8 bytes at a time + * to reduce branch misses since we are operating on a small block + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s