From: Nick Terrell Date: Thu, 13 May 2021 22:44:12 +0000 (-0700) Subject: [lib] Fix determinism bug in the optimal parser X-Git-Tag: v1.5.1~1^2~173^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=91c9a247b67374b74753a7d3dcc8999db710b139;p=thirdparty%2Fzstd.git [lib] Fix determinism bug in the optimal parser `ZSTD_insertBt1()` has a speed optimization that skips the prefix of very long matches. https://github.com/facebook/zstd/blob/40def70387f99b239f3f566ba399275a8fd44cde/lib/compress/zstd_opt.c#L476 This optimization is based off the length longest match found. However, when indices are reset, we only ensure that we can reference the whole window starting from `ip`. If the previous block ended with a long match then `nextToUpdate` could be much less than `ip`. It might be far enough back that `nextToUpdate < maxDist`, so it doesn't have a full window of data to reference. This can cause non-determinism bugs, because we may find a match that is beyond `ip - maxDist`, and may sometimes be un-referencable, and that match triggers the speed optimization. The fix is to base the `windowLow` off of the `target` of `ZSTD_updateTree_internal()`, because anything below that value will be obsolete by the time `ZSTD_updateTree_internal()` completes. --- diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 402a7e5c7..a880fadad 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -364,11 +364,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, * Binary Tree search ***************************************/ /** ZSTD_insertBt1() : add one or multiple positions to tree. - * ip : assumed <= iend-8 . + * @param ip assumed <= iend-8 . + * @param target The target of ZSTD_updateTree_internal() - we are filling to this position * @return : nb of positions added */ static U32 ZSTD_insertBt1( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, + U32 const target, U32 const mls, const int extDict) { const ZSTD_compressionParameters* const cParams = &ms->cParams; @@ -391,7 +393,10 @@ static U32 ZSTD_insertBt1( U32* smallerPtr = bt + 2*(curr&btMask); U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; + /* windowLow is based on target because we're only need positions that will be + * in the window at the end of the tree update. + */ + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); U32 matchEndIdx = curr+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; @@ -404,6 +409,7 @@ static U32 ZSTD_insertBt1( DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + assert(curr <= target); assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = curr; /* Update Hash Table */ @@ -492,7 +498,7 @@ void ZSTD_updateTree_internal( idx, target, dictMode); while(idx < target) { - U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); assert(idx < (U32)(idx + forward)); idx += forward; } @@ -893,7 +899,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_ */ U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); - } + } ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); } ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);