From: Yann Collet Date: Thu, 1 Aug 2019 13:58:17 +0000 (+0200) Subject: fixed compression ratio regression when dictionary-compressing medium-size inputs... X-Git-Tag: v1.4.3^2~5^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=98692c2838352b4b2696368d338361c62078ca7d;p=thirdparty%2Fzstd.git fixed compression ratio regression when dictionary-compressing medium-size inputs at levels 1-3 --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 8ad15e1cc..1200d828c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2270,7 +2270,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { size_t cSize; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); FORWARD_IF_ERROR(bss); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 10b59d390..9a511e55c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -134,9 +134,13 @@ typedef struct { typedef struct ZSTD_matchState_t ZSTD_matchState_t; struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ U32 nextToUpdate; /* index from which to continue table update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ U32* hashTable; U32* hashTable3; U32* chainTable; @@ -763,24 +767,37 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window, /* Similar to ZSTD_window_enforceMaxDist(), * but only invalidates dictionary - * when input progresses beyond window size. */ + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ MEM_STATIC void -ZSTD_checkDictValidity(ZSTD_window_t* window, +ZSTD_checkDictValidity(const ZSTD_window_t* window, const void* blockEnd, U32 maxDist, U32* loadedDictEndPtr, const ZSTD_matchState_t** dictMatchStatePtr) { - U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - - if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) { - /* On reaching window size, dictionaries are invalidated */ - if (loadedDictEndPtr) *loadedDictEndPtr = 0; - if (dictMatchStatePtr) *dictMatchStatePtr = NULL; - } + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } } /** diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 5957255d9..f1c6520e1 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -370,8 +370,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const base = ms->window.base; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 maxDistance = 1U << cParams->windowLog; - const U32 lowestValid = ms->window.lowLimit; - const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; + const U32 validLowest = ms->window.lowLimit; + const int isDictionary = (ms->loadedDictEnd != 0); + const U32 withinWindow = (endIndex - validLowest > maxDistance) ? endIndex - maxDistance : validLowest; + const U32 lowLimit = isDictionary ? validLowest : withinWindow; const U32 dictStartIndex = lowLimit; const U32 dictLimit = ms->window.dictLimit; const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index a05b8a47f..fd7869f5c 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -71,6 +71,7 @@ size_t ZSTD_compressBlock_fast_generic( U32 offsetSaved = 0; /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); ip1 = ip0 + 1; { @@ -239,6 +240,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); ip += (dictAndPrefixLength == 0); /* dictMatchState repCode checks don't currently handle repCode == 0 * disabling. */ @@ -380,8 +382,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 maxDistance = 1U << cParams->windowLog; - const U32 validLow = ms->window.lowLimit; - const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow; + const U32 validLowest = ms->window.lowLimit; + const int isDictionary = (ms->loadedDictEnd != 0); + const U32 withinWindow = (endIndex - validLowest > maxDistance) ? endIndex - maxDistance : validLowest; + const U32 lowLimit = isDictionary ? validLowest : withinWindow; const U32 dictStartIndex = lowLimit; const BYTE* const dictStart = dictBase + dictStartIndex; const U32 dictLimit = ms->window.dictLimit; @@ -392,6 +396,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic"); + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); @@ -412,8 +418,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else { @@ -423,8 +429,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; } - { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; U32 offset; mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ @@ -451,7 +457,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 94d906c01..9d47366fd 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -619,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( /* ******************************* * Common parser - lazy strategy *********************************/ -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_lazy_generic( +typedef enum { search_hashChain, search_binaryTree } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth, + const searchMethod_e searchMethod, const U32 depth, ZSTD_dictMode_e const dictMode) { const BYTE* const istart = (const BYTE*)src; @@ -640,8 +642,8 @@ size_t ZSTD_compressBlock_lazy_generic( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); + (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : + (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const ZSTD_matchState_t* const dms = ms->dictMatchState; @@ -858,56 +860,56 @@ size_t ZSTD_compressBlock_btlazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); } size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); } size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); } size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); } size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); } @@ -916,7 +918,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) + const searchMethod_e searchMethod, const U32 depth) { const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; @@ -934,7 +936,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; U32 offset_1 = rep[0], offset_2 = rep[1]; @@ -1075,7 +1077,7 @@ _storeSequence: rep[1] = offset_2; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -1083,7 +1085,7 @@ size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); } size_t ZSTD_compressBlock_lazy_extDict( @@ -1091,7 +1093,7 @@ size_t ZSTD_compressBlock_lazy_extDict( void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); } size_t ZSTD_compressBlock_lazy2_extDict( @@ -1099,7 +1101,7 @@ size_t ZSTD_compressBlock_lazy2_extDict( void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); } size_t ZSTD_compressBlock_btlazy2_extDict( @@ -1107,5 +1109,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict( void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index f42de9ed5..8ac82a37c 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -231,7 +231,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig /* advanced MT API test */ if (part <= 3) - { unsigned nbThreads; + { int nbThreads; for (nbThreads=1; nbThreads<=4; nbThreads++) { int compressionLevel; for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { @@ -242,7 +242,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) ); CHECK_Z( ZSTD_compress2(cctx, outBuffer, outSize, inBuffer, inSize) ); ZSTD_freeCCtx(cctx); - DISPLAYLEVEL(3, "compress_generic,-T%u,end level %i : ", + DISPLAYLEVEL(3, "compress_generic,-T%i,end level %i : ", nbThreads, compressionLevel); FUZ_displayMallocStats(malcount); } } } @@ -450,7 +450,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++); { /* create compressed buffer with content size missing */ - ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); CHECKPLUS(r, ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, @@ -777,7 +777,7 @@ static int basicUnitTests(U32 seed, double compressibility) CHECK( ZSTD_initCStream(staticCCtx, 1) ); DISPLAYLEVEL(3, "OK \n"); - DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++); + DISPLAYLEVEL(3, "test%3i : init static CStream with dictionary (should fail) : ", testNb++); { size_t const r = ZSTD_initCStream_usingDict(staticCCtx, CNBuffer, 64 KB, 1); if (!ZSTD_isError(r)) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); @@ -1963,7 +1963,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "test%3i : compress lots 3-bytes sequences : ", testNb++); { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), - CNBuffer, _3BYTESTESTLENGTH, 19) ); + CNBuffer, _3BYTESTESTLENGTH, 19) ); cSize = r; } DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/_3BYTESTESTLENGTH*100);