From: Yann Collet Date: Sat, 21 Feb 2015 02:31:59 +0000 (+0100) Subject: Updated FSE X-Git-Tag: v0.1.0~4^2^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a3c75bad5a4fad5d687709a877392806f7ef9ced;p=thirdparty%2Fzstd.git Updated FSE Faster speed on barely compressible data --- diff --git a/lib/fse.c b/lib/fse.c index 56ac11ae3..a6d30c729 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -540,7 +540,6 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */ if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1; /* Need a minimum to safely represent all symbol values */ - //if ((FSE_highbit32(maxSymbolValue)+2) > tableLog) tableLog = FSE_highbit32(maxSymbolValue)+2; /* Need a minimum to safely represent all symbol values */ if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; return tableLog; @@ -561,6 +560,10 @@ int FSE_compareRankT(const void* r1, const void* r2) return 2 * (R1->count < R2->count) - 1; } +static U32 g_tableLog_test =0; +static U32 g_total_test = 0; + +#if 0 static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue) { rank_t rank[FSE_MAX_SYMBOL_VALUE+2]; @@ -602,6 +605,99 @@ static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned return 0; } +#else + +static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue) +{ + U32 s; + U32 total = g_total_test; + U32 tableLog = g_tableLog_test; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + (void)pointsToRemove; + U32 lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) + { + if (count[s] == 0) + { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) + { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) + { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + norm[s]=-2; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) + { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) + { + if ((norm[s] == -2) && (count[s] <= lowOne)) + { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) + { + /* all values are pretty poor; probably incompressible data (should have already been detected); + find max, then give all remaining to max */ + U32 maxV = 0, maxC =0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) maxV=s, maxC=count[s]; + norm[maxV] += ToDistribute; + return 0; + } + + { + U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<> vStepLog); + U32 sEnd = (U32)(end >> vStepLog); + U32 weight = sEnd - sStart; + if (weight < 1) + return (size_t)-FSE_ERROR_GENERIC; + norm[s] = weight; + tmpTotal = end; + } + } + } + + return 0; +} +#endif + size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t total, @@ -654,12 +750,13 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, stillToDistribute -= proba; } } + g_tableLog_test = tableLog; + g_total_test = total; if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { /* corner case, need to converge towards normalization with caution */ size_t errorCode = FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue); if (FSE_isError(errorCode)) return errorCode; - //FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue); } else normalizedCounter[largest] += (short)stillToDistribute; } @@ -869,12 +966,6 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, } -static size_t FSE_compressRLE (BYTE *out, BYTE symbol) -{ - *out=symbol; - return 1; -} - size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } @@ -901,8 +992,8 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize /* Scan input and build symbol stats */ errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue); if (FSE_isError(errorCode)) return errorCode; - if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart); - if (errorCode < ((srcSize * 7) >> 10)) return 0; /* Heuristic : not compressible enough */ + if (errorCode == srcSize) return 1; + if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue); diff --git a/lib/zstd.c b/lib/zstd.c index 51d366d31..e81e63309 100644 --- a/lib/zstd.c +++ b/lib/zstd.c @@ -613,25 +613,27 @@ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize, size_t errorCode; const size_t minGain = ZSTD_minGain(srcSize); - // early out + /* early out */ if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; - // Scan input and build symbol stats + /* Scan input and build symbol stats */ errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue); if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; if (errorCode == srcSize) return 1; - if (errorCode < ((srcSize * 7) >> 10)) return 0; + //if (errorCode < ((srcSize * 7) >> 10)) return 0; + //if (errorCode < (srcSize >> 7)) return 0; + if (errorCode < (srcSize >> 6)) return 0; /* heuristic : probably not compressible enough */ tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue); if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; - // Write table description header + /* Write table description header */ errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog); if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; op += errorCode; - // Compress + /* Compress */ errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog); if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);