From: Yann Collet Date: Wed, 9 May 2018 00:43:13 +0000 (-0700) Subject: implemented fractional bit cost evaluation X-Git-Tag: v1.3.5~3^2~55^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ba2ad9b6b993ace72b153e579be0823a61dc8769;p=thirdparty%2Fzstd.git implemented fractional bit cost evaluation for FSE symbols. While it seems to work, the gains are negligible compared to rough maxNbBits evaluation. There are even a few losses sometimes, that still need to be explained. Furthermode, there are still cases where btlazy2 does a better job than btopt, which seems rather strange too. --- diff --git a/lib/common/fse.h b/lib/common/fse.h index 556e6c523..677078558 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -577,10 +577,23 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt MEM_STATIC U32 FSE_getMaxNbBits(const FSE_symbolCompressionTransform* symbolTT, U32 symbolValue) { - assert(symbolValue <= FSE_MAX_SYMBOL_VALUE); return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; } +/* FSE_bitCost_b256() : + * Approximate symbol cost, + * provide fractional value, using fixed-point format (8 bit) */ +MEM_STATIC U32 FSE_bitCost_b256(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue) +{ + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(symbolTT[symbolValue].deltaNbBits + (1<> tableLog; /* linear interpolation (very approximate) */ + assert(normalizedDeltaFromThreshold <= 256); + return (minNbBits+1)*256 - normalizedDeltaFromThreshold; +} + /* ====== Decompression ====== */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index d800254cc..80edb1a7b 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -126,11 +126,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return (LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) << 8; + U32 const price = LL_bits[llCode]*256 + FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, llCode); + DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / 256); + return price; } if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); - /* literal Length */ + /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) << 8; } @@ -156,7 +158,9 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return ((int)(LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) - FSE_getMaxNbBits(cstate.symbolTT, 0)) * 256; + return (int)(LL_bits[llCode] * 256) + + FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, llCode) + - FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, 0); } if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1); @@ -205,9 +209,8 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, FSE_CState_t mlstate, offstate; FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); - return (FSE_getMaxNbBits(offstate.symbolTT, offCode) + offCode - + FSE_getMaxNbBits(mlstate.symbolTT, mlCode) + ML_bits[mlCode]) - * 256; + return FSE_bitCost_b256(offstate.symbolTT, offstate.stateLog, offCode) + offCode*256 + + FSE_bitCost_b256(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*256; } if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 73f5c46c0..2c2d13803 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -965,7 +965,7 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { assert(dstSize > 0); - assert(dstSize <= 128 KB); + assert(dstSize <= 128*1024); /* decoder timing evaluation */ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ U32 const D256 = (U32)(dstSize >> 8);