From: Yann Collet Date: Thu, 12 Dec 2024 00:13:22 +0000 (-0800) Subject: added unit tests to ZSTD_compressSequencesAndLiterals() X-Git-Tag: v1.5.7^2~48^2~39 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b013b26884bb25149eedc6d78a5d8f5dc38739a;p=thirdparty%2Fzstd.git added unit tests to ZSTD_compressSequencesAndLiterals() seems to work as expected, correctly control that `litSize` and `srcSize` are exactly correct. --- diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index eed24a7a1..7cfeda22d 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -1422,7 +1422,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, This can be useful if the process generating the sequences also happens to generate the buffer of literals, thus skipping an extraction + caching stage. It's essentially a speed optimization when the right conditions are met, - but it also includes so following limitations: + but it also is restricted by the following limitations: - Only supports explicit delimiter mode - Not compatible with frame checksum, which must disabled - Can fail when unable to compress sufficiently diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d7ee68060..bb92bd1ff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, */ static size_t ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, + ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr, const ZSTD_Sequence* const inSeqs, size_t nbSequences, size_t blockSize, ZSTD_ParamSwitch_e externalRepSearch) @@ -7114,6 +7114,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx, Repcodes_t updatedRepcodes; U32 dictSize; size_t startPosInSrc = seqPos->posInSrc; + size_t litConsumed = 0; DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize); @@ -7150,10 +7151,15 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx, RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength); + litConsumed += litLength; } /* last sequence (only literals) */ - seqPos->posInSrc += inSeqs[idx].litLength; + { size_t const lastLitLength = inSeqs[idx].litLength; + seqPos->posInSrc += lastLitLength; + cctx->seqStore.lit += lastLitLength; /* register proper length */ + litConsumed += lastLitLength; + } /* blockSize must be exactly correct (checked before calling this function) */ assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc; @@ -7184,6 +7190,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx, ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); seqPos->idx = idx+1; + *litConsumedPtr = litConsumed; return blockSize; } @@ -7214,21 +7221,23 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, } while (remaining) { - size_t compressedSeqsSize; - size_t cBlockSize; + size_t compressedSeqsSize, cBlockSize, litConsumed; size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, cctx->blockSize, remaining, inSeqs, nbSequences, seqPos); U32 const lastBlock = (blockSize == remaining); FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); + RETURN_ERROR_IF(!lastBlock, GENERIC, "Only supports single block"); assert(blockSize <= remaining); ZSTD_resetSeqStore(&cctx->seqStore); blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx, - &seqPos, + &seqPos, &litConsumed, inSeqs, nbSequences, blockSize, cctx->appliedParams.searchForExternalRepcodes); + RETURN_ERROR_IF(blockSize != remaining, GENERIC, "Must consume the entire block"); + RETURN_ERROR_IF(litConsumed != litSize, GENERIC, "Must consume the exact amount of literals provided"); FORWARD_IF_ERROR(blockSize, "Bad sequence copy"); /* Note: when blockSize is very small, other variant send it uncompressed. diff --git a/lib/zstd.h b/lib/zstd.h index 67cb4d987..3c0e836c9 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1666,17 +1666,18 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx, * This can be useful if the process generating the sequences also happens to generate the buffer of literals, * thus skipping an extraction + caching stage. * It's essentially a speed optimization when the right conditions are met, - * but it also is restricted by the following limitations: + * but it also features the following limitations: * - Only supports explicit delimiter mode + * - Supports 1 block only (max input 128 KB) * - Not compatible with frame checksum, which must disabled - * - Can fail when unable to compress sufficiently + * - Can fail (return an error) when input data cannot be compress sufficiently * Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs. * @return : final compressed size, or a ZSTD error code. */ ZSTDLIB_STATIC_API size_t ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const ZSTD_Sequence* inSeqs, size_t nbSequences, const void* literals, size_t litSize, size_t srcSize); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 2180d1edb..3063ec2d1 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -40,7 +40,6 @@ #include "datagen.h" /* RDG_genBuffer */ #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ #include "xxhash.h" /* XXH64 */ -#include "util.h" #include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */ /* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */ #include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */ @@ -339,6 +338,35 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, } } +static size_t FUZ_getLitSize(const ZSTD_Sequence* seqs, size_t nbSeqs) +{ + size_t n, litSize = 0; + assert(seqs != NULL); + for (n=0; n