From: Yann Collet Date: Thu, 28 Jan 2016 02:24:53 +0000 (+0100) Subject: optimized counting of small segments X-Git-Tag: v0.5.0~1^2~3^2~23 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4ddb1f556680242c5e614e0884143e58810eed54;p=thirdparty%2Fzstd.git optimized counting of small segments --- diff --git a/lib/fse.c b/lib/fse.c index 309b28caf..61272a624 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -141,102 +141,6 @@ typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; /* Function templates */ -size_t FSE_count_generic(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe) -{ - const FSE_FUNCTION_TYPE* ip = source; - const FSE_FUNCTION_TYPE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - int s; - - U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 }; - U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 }; - U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 }; - U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 }; - - /* safety checks */ - if (!sourceSize) - { - memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE)); - *maxSymbolValuePtr = 0; - return 0; - } - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(GENERIC); /* maxSymbolValue too large : unsupported */ - if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */ - - if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1)) - { - /* check input values, to avoid count table overflow */ - while (ip < iend-3) - { - if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting1[*ip++]++; - if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting2[*ip++]++; - if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting3[*ip++]++; - if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting4[*ip++]++; - } - } - else - { - U32 cached = MEM_read32(ip); ip += 4; - while (ip < iend-15) - { - U32 c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - } - ip-=4; - } - - /* finish last symbols */ - while (ipmaxSymbolValue)) return ERROR(GENERIC); Counting1[*ip++]++; } - - for (s=0; s<=(int)maxSymbolValue; s++) - { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - } - - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* hidden fast variant (unsafe) */ -size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) -(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize) -{ - return FSE_count_generic(count, maxSymbolValuePtr, source, sourceSize, 0); -} - -size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) -(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize) -{ - if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255)) - { - *maxSymbolValuePtr = 255; - return FSE_count_generic(count, maxSymbolValuePtr, source, sourceSize, 0); - } - return FSE_count_generic(count, maxSymbolValuePtr, source, sourceSize, 1); -} - - static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) @@ -264,35 +168,28 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned /* symbol start positions */ cumul[0] = 0; - for (i=1; i<=maxSymbolValue+1; i++) - { - if (normalizedCounter[i-1]==-1) /* Low proba symbol */ - { + for (i=1; i<=maxSymbolValue+1; i++) { + if (normalizedCounter[i-1]==-1) { /* Low proba symbol */ cumul[i] = cumul[i-1] + 1; tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1); - } - else + } else { cumul[i] = cumul[i-1] + normalizedCounter[i-1]; - } + } } cumul[maxSymbolValue+1] = tableSize+1; /* Spread symbols */ - for (symbol=0; symbol<=maxSymbolValue; symbol++) - { + for (symbol=0; symbol<=maxSymbolValue; symbol++) { int nbOccurences; - for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ - } - } + } } if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ /* Build table */ - for (i=0; i= largeLimit) noLarge=0; symbolNext[s] = normalizedCounter[s]; - } - } + } } /* Spread symbols */ - for (s=0; s<=maxSymbolValue; s++) - { + for (s=0; s<=maxSymbolValue; s++) { int i; - for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } - } + } } if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ /* Build Decoding table */ { U32 i; - for (i=0; i max) max = count[s]; + + return (size_t)max; +} + + +static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32 s; + + U32 Counting1[256] = { 0 }; + U32 Counting2[256] = { 0 }; + U32 Counting3[256] = { 0 }; + U32 Counting4[256] = { 0 }; + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + { /* by stripes of 16 bytes */ + U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ipmaxSymbolValue; s--) + { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0); +} + +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + if (*maxSymbolValuePtr <255) + return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1); + *maxSymbolValuePtr = 255; + return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize); +} + + +/*-************************************************************** * FSE Compression Code ****************************************************************/ -/* +/*! FSE_CTable is a variable size structure which contains : U16 tableLog; U16 maxSymbolValue; @@ -657,7 +664,6 @@ void FSE_freeCTable (FSE_CTable* ct) free(ct); } - /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) { @@ -872,10 +878,8 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) tableU16[1] = 0; /* just in case */ /* Build Symbol Transformation Table */ - { - symbolTT[symbolValue].deltaNbBits = 0; - symbolTT[symbolValue].deltaFindState = 0; - } + symbolTT[symbolValue].deltaNbBits = 0; + symbolTT[symbolValue].deltaFindState = 0; return 0; } @@ -905,15 +909,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) /* join to even */ - if (srcSize & 1) - { + if (srcSize & 1) { FSE_encodeSymbol(&bitC, &CState1, *--ip); FSE_FLUSHBITS(&bitC); } /* join to mod 4 */ - if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */ - { + if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ FSE_encodeSymbol(&bitC, &CState2, *--ip); FSE_encodeSymbol(&bitC, &CState1, *--ip); FSE_FLUSHBITS(&bitC); @@ -929,8 +931,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, FSE_encodeSymbol(&bitC, &CState1, *--ip); - if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */ - { + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ FSE_encodeSymbol(&bitC, &CState2, *--ip); FSE_encodeSymbol(&bitC, &CState1, *--ip); } @@ -1013,7 +1014,7 @@ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize) } -/********************************************************* +/*-******************************************************* * Decompression (Byte symbols) *********************************************************/ size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) diff --git a/lib/fse.h b/lib/fse.h index e98f1a54a..7e6bfc1a5 100644 --- a/lib/fse.h +++ b/lib/fse.h @@ -124,13 +124,13 @@ or to save and provide normalized distribution using external method. /*! FSE_count(): - Provides the precise count of each symbol within a table 'count' - 'count' is a table of unsigned int, of minimum size (maxSymbolValuePtr[0]+1). - maxSymbolValuePtr[0] will be updated if detected smaller than initially expected - return : the count of the most frequent symbol (which is not identified) - if return == srcSize, there is only one symbol. - if FSE_isError(return), it's an error code. */ -size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize); + Provides the precise count of each byte within a table 'count' + 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + *maxSymbolValuePtr will be updated if detected smaller than initial value. + @return : the count of the most frequent symbol (which is not identified) + if return == srcSize, there is only one symbol. + Can also return an error code, which can be tested with FSE_isError() */ +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); /*! FSE_optimalTableLog(): diff --git a/lib/fse_static.h b/lib/fse_static.h index 3028a2f40..eb031639c 100644 --- a/lib/fse_static.h +++ b/lib/fse_static.h @@ -63,8 +63,8 @@ extern "C" { /* ***************************************** * FSE advanced API *******************************************/ -size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize); -/* same as FSE_count(), but blindly trust that all values within src are <= *maxSymbolValuePtr */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +/* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); /* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */