From 55b90ef010f2a928e3f97e8e47277d1e643874f1 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 16 Nov 2020 10:47:26 -0500 Subject: [PATCH] Fix unit tests to agree with new changes --- lib/compress/zstd_compress.c | 51 +++++++++++++++++++++--------------- tests/fuzzer.c | 6 +++-- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 434ad6e1c..263a45025 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4489,7 +4489,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, typedef struct { U32 idx; /* Index in array of ZSTD_Sequence */ U32 posInSequence; /* Position within sequence at idx */ - U64 posInSrc; /* Position in src stream */ + U64 posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_sequencePosition; #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) @@ -4510,7 +4510,7 @@ static size_t ZSTD_validateSequence(U32 offCode, U32 repCode, U32 matchLength, } #endif -/* Returns offset code, given a raw offset and repcode array */ +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32* const rep, U32 ll0) { U32 offCode = rawOffset + ZSTD_REP_MOVE; U32 repCode = 0; @@ -4525,17 +4525,21 @@ static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32* const rep, U32 ll0) { repCode = 3; } if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ offCode = repCode - 1; } return offCode; } +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ static size_t ZSTD_copySequencesToSeqStoreBlockDelim(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize, ZSTD_CCtx* cctx) { size_t idx = seqPos->idx; BYTE const* ip = (BYTE const*)(src); - BYTE const* iend = ip + blockSize; + const BYTE const* iend = ip + blockSize; repcodes_t updatedRepcodes; U32 dictSize; U32 litLength; @@ -4545,12 +4549,11 @@ static size_t ZSTD_copySequencesToSeqStoreBlockDelim(seqStore_t* seqStore, ZSTD_ if (cctx->cdict) { dictSize = cctx->cdict->dictContentSize; - } else if (cctx->prefixDict.dictSize) { + } else if (cctx->prefixDict.dict) { dictSize = cctx->prefixDict.dictSize; } else { dictSize = 0; } - ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { litLength = inSeqs[idx].litLength; @@ -4587,7 +4590,15 @@ static size_t ZSTD_copySequencesToSeqStoreBlockDelim(seqStore_t* seqStore, ZSTD_ } /* Returns the number of bytes to move the current read position back by. Only non-zero - * if we ended up splitting a sequence. + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. */ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, @@ -4596,7 +4607,7 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePo size_t startPosInSequence = seqPos->posInSequence; size_t endPosInSequence = seqPos->posInSequence + blockSize; BYTE const* ip = (BYTE const*)(src); - BYTE const* iend = ip + blockSize; + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ repcodes_t updatedRepcodes; U32 bytesAdjustment = 0; U32 finalMatchSplit = 0; @@ -4608,11 +4619,11 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePo U32 repCode; if (cctx->cdict) { - dictSize = ZSTD_sizeof_CDict(cctx->cdict); - } else if (cctx->prefixDict.dictSize) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { dictSize = cctx->prefixDict.dictSize; - } else if (ZSTD_sizeof_localDict(cctx->localDict)) { - dictSize = ZSTD_sizeof_localDict(cctx->localDict); + } else { + dictSize = 0; } DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %zu PIS: %u blockSize: %zu windowSize: %u", idx, startPosInSequence, blockSize, windowSize); DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); @@ -4638,9 +4649,10 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePo startPosInSequence = 0; idx++; } else { - /* This is the final sequence we're adding from inSeqs, and endPosInSequence + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence does not reach the end of the match. So, we have to split the sequence */ - DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); if (endPosInSequence > litLength) { litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; U32 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; @@ -4660,7 +4672,8 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePo } else { /* Move the position in sequence backwards so that we don't split match, and break to store * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence - * should go. + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small */ bytesAdjustment = endPosInSequence - currSeq.litLength; endPosInSequence = currSeq.litLength; @@ -4692,7 +4705,6 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePo assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); seqPos->idx = idx; seqPos->posInSequence = endPosInSequence; - /* Update repcodes */ ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); iend -= bytesAdjustment; @@ -4712,7 +4724,7 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePo /* Compress, block-by-block, all of the sequences given. * - * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. */ static size_t ZSTD_compressSequences_internal(void* dst, size_t dstCapacity, ZSTD_CCtx* cctx, @@ -4849,16 +4861,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci op += frameHeaderSize; dstCapacity -= frameHeaderSize; cSize += frameHeaderSize; - - /* Update checksum if requested */ if (cctx->appliedParams.fParams.checksumFlag && srcSize) { XXH64_update(&cctx->xxhState, src, srcSize); } - /* cSize includes block header size and compressed sequences size */ compressedBlocksSize = ZSTD_compressSequences_internal(op, dstCapacity, - cctx, inSeqs, inSeqsSize, - src, srcSize); + cctx, inSeqs, inSeqsSize, + src, srcSize); FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); cSize += compressedBlocksSize; dstCapacity -= compressedBlocksSize; diff --git a/tests/fuzzer.c b/tests/fuzzer.c index f40ca08e8..2e5d70ef2 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2764,7 +2764,8 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Test with block delimiters roundtrip */ seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); - compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters); + compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize); if (ZSTD_isError(compressedSize)) { DISPLAY("Error in sequence compression with block delims\n"); goto _output_error; @@ -2779,7 +2780,8 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Test with no block delimiters roundtrip */ seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); - compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); + compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize); if (ZSTD_isError(compressedSize)) { DISPLAY("Error in sequence compression with no block delims\n"); goto _output_error; -- 2.47.3