From: Yann Collet Date: Tue, 18 Dec 2018 20:32:58 +0000 (-0800) Subject: btultra2 and very small srcSize X-Git-Tag: v1.3.8~22^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=635783da123cef1027a9d7d6fe452ca36d26d58f;p=thirdparty%2Fzstd.git btultra2 and very small srcSize When srcSize is small, the nb of symbols produced is likely too small to warrant dedicated probability tables. In which case, predefined distribution tables will be used instead. There is a cheap algorithm in btultra initialization : it presumes default distribution will be used if srcSize <= 1024. btultra2 now uses the same threshold to shut down probability estimation, since measured frequencies won't be used at entropy stage, and therefore relying on them to determine sequence cost is misleading, resulting in worse compression ratios. This fixes btultra2 performance issue on very small input. Note that, a proper way should be to determine which symbol is going to use predefined probaility and which symbol is going to use dynamic ones. But the current algorithm is unable to make a "per-symbol" decision. So this will require significant modifications. --- diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index ce233f47f..be9fc7dc0 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -17,6 +17,8 @@ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + /*-************************************* * Price functions for optimal parser @@ -73,7 +75,7 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) { U32 s, sum=0; - DEBUGLOG(2, "ZSTD_downscaleStat (nbElts=%u)", lastEltIndex+1); + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", lastEltIndex+1); assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); for (s=0; s> (ZSTD_FREQ_DIV+malus)); @@ -96,7 +98,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->priceType = zop_dynamic; if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= 1024) /* heuristic */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) /* heuristic */ optPtr->priceType = zop_predef; assert(optPtr->symbolCosts != NULL); @@ -789,6 +791,7 @@ static U32 ZSTD_totalLen(ZSTD_optimal_t sol) return sol.litlen + sol.mlen; } +#if 0 /* debug */ static void listStats(const U32* table, int lastEltID) @@ -803,6 +806,8 @@ listStats(const U32* table, int lastEltID) RAWLOG(2, " \n"); } +#endif + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, seqStore_t* seqStore, @@ -834,12 +839,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); - DEBUGLOG(2, "OffCode table on entry : "); - listStats(optStatePtr->offCodeFreq, MaxOff); - - DEBUGLOG(2, "Literals table on entry : "); - listStats(optStatePtr->litFreq, MaxLit); - /* Match Loop */ while (ip < ilimit) { U32 cur, last_pos = 0; @@ -1067,12 +1066,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ } /* while (ip < ilimit) */ - DEBUGLOG(2, "OffCode table on exit : "); - listStats(optStatePtr->offCodeFreq, MaxOff); - - DEBUGLOG(2, "Literals table on exit : "); - listStats(optStatePtr->litFreq, MaxLit); - /* Return the last literals size */ return iend - anchor; } @@ -1104,9 +1097,9 @@ static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) { optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); } /* ZSTD_initStats_ultra(): @@ -1171,6 +1164,7 @@ size_t ZSTD_compressBlock_btultra2( && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ && (ms->window.dictLimit - ms->nextToUpdate <= 1) /* no prefix (note: intentional overflow, defined as 2-complement) */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) ) { ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } diff --git a/programs/benchzstd.c b/programs/benchzstd.c index 90357b5b5..30864f478 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -359,8 +359,7 @@ BMK_benchMemAdvancedNoAlloc( srcPtrs[nbBlocks] = srcPtr; srcSizes[nbBlocks] = thisBlockSize; cPtrs[nbBlocks] = cPtr; -#warning force streaming mode - cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize) - 1; + cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); resPtrs[nbBlocks] = resPtr; resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize;