From: Yann Collet Date: Thu, 23 Dec 2021 20:03:33 +0000 (-0800) Subject: change ZSTD_storeSeq() interface to accept matchLength X-Git-Tag: v1.5.2^2~23^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b77fcac61fadf665f7522dd0c2e44b373eb7d57d;p=thirdparty%2Fzstd.git change ZSTD_storeSeq() interface to accept matchLength instead of mlBase. This removes the need to do `- MINMATCH` at every call site. The new interface contract is checked with an `assert()`. --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 34f8e970f..24a740850 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -5816,7 +5816,7 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS } RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); ip += matchLength + litLength; } ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); @@ -5942,7 +5942,7 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); ip += matchLength + litLength; } DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 7360ee8e2..ca63903fe 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -601,13 +601,13 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie } /*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - * `mlBase` : matchLength - MINMATCH + * @matchLength : must be >= MINMATCH * Allowed to overread literals up to litLimit. */ HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t matchLength) { BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; BYTE const* const litEnd = literals + litLength; @@ -616,7 +616,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + pos, (U32)litLength, (U32)matchLength, (U32)offCode); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); @@ -650,12 +650,15 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera seqStorePtr->sequences[0].offset = offCode + 1; /* match Length */ - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ - seqStorePtr->longLengthType = ZSTD_llt_matchLength; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + assert(matchLength >= MINMATCH); + { size_t const mlBase = matchLength - MINMATCH; + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; } - seqStorePtr->sequences[0].matchLength = (U16)mlBase; seqStorePtr->sequences++; } diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index b9393b6a4..c9f7b9d74 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -131,7 +131,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength); goto _match_stored; } @@ -217,7 +217,7 @@ _match_found: /* requires ip, offset, mLength */ hashLong[hl1] = (U32)(ip1 - base); } - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); _match_stored: /* match found */ @@ -243,7 +243,7 @@ _match_stored: U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength); ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -328,7 +328,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength); goto _match_stored; } @@ -419,7 +419,7 @@ _match_found: offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); _match_stored: /* match found */ @@ -448,7 +448,7 @@ _match_stored: const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; @@ -585,7 +585,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -596,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -621,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; @@ -653,7 +653,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 2bae2f70e..29c00866e 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -282,7 +282,7 @@ _match: /* Requires: ip0, match0, offcode */ /* Count the forward length. */ mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength - MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); ip0 += mLength; anchor = ip0; @@ -306,7 +306,7 @@ _match: /* Requires: ip0, match0, offcode */ { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength); anchor = ip0; continue; /* faster when present (confirmed on gcc-8) ... (?) */ } } } @@ -439,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -459,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -474,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); } /* match found */ @@ -499,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -598,7 +598,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength); ip += rLength; anchor = ip; } else { @@ -614,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength); ip += mLength; anchor = ip; } } @@ -633,7 +633,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index c40473cad..83831c673 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1668,7 +1668,7 @@ ZSTD_compressBlock_lazy_generic( /* store sequence */ _storeSequence: { size_t const litLength = (size_t)(start - anchor); - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength); anchor = ip = start + matchLength; } @@ -1685,7 +1685,7 @@ _storeSequence: const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength); ip += matchLength; anchor = ip; continue; @@ -1700,7 +1700,7 @@ _storeSequence: /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -2010,7 +2010,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* store sequence */ _storeSequence: { size_t const litLength = (size_t)(start - anchor); - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength); anchor = ip = start + matchLength; } @@ -2028,7 +2028,7 @@ _storeSequence: const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 19b99f278..1e7e626d2 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -710,7 +710,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, /* Store the sequence */ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); + sequence.matchLength); ip += sequence.matchLength; } } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 7e5eb8b57..2356fa2ae 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1286,7 +1286,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); anchor += advance; ip = anchor; } } diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 4f4db8575..75d89f52c 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -759,7 +759,7 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, } /* use libzstd sequence handling */ ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen, - offsetCode, matchLen - MINMATCH); + offsetCode, matchLen); literalsSize -= literalLen; excessMatch -= (matchLen - MIN_SEQ_LEN);