From: Yann Collet Date: Sat, 22 Jan 2022 10:30:08 +0000 (-0800) Subject: fix sequence compression API in Explicit Delimiter mode X-Git-Tag: v1.5.4^2~261^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=87dcd3326a587c7e9d61c2910f38337aa014355d;p=thirdparty%2Fzstd.git fix sequence compression API in Explicit Delimiter mode --- diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 6d1135f8c..38e419d35 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -27,7 +27,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(version_unsupported): return "Version not supported"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; - case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(corruption_detected): return "Input corruption detected"; case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; case PREFIX(parameter_unsupported): return "Unsupported parameter"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1cb229f7a..fc25d0541 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -5752,9 +5752,9 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, } typedef struct { - U32 idx; /* Index in array of ZSTD_Sequence */ - U32 posInSequence; /* Position within sequence at idx */ - size_t posInSrc; /* Number of bytes given by sequences provided so far */ + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_sequencePosition; /* ZSTD_validateSequence() : @@ -5809,6 +5809,8 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, repcodes_t updatedRepcodes; U32 dictSize; + DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize); + if (cctx->cdict) { dictSize = (U32)cctx->cdict->dictContentSize; } else if (cctx->prefixDict.dict) { @@ -5995,6 +5997,56 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) return sequenceCopier; } +/* Discover the size of next by searching for the block delimiter. + * Note that a block delimiter must exist in this mode, + * otherwise it's an input error. + * The value retrieved will be later compared to ensure it remains within bounds */ +static size_t +blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) +{ + int end = 0; + size_t blockSize = 0; + size_t spos = seqPos.idx; + assert(spos <= inSeqsSize); + while (spos < inSeqsSize) { + end = (inSeqs[spos].offset == 0); + blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength; + if (end) { + if (inSeqs[spos].matchLength != 0) + RETURN_ERROR(corruption_detected, "delimiter format error : both matchlength and offset must be == 0"); + break; + } + spos++; + } + if (!end) + RETURN_ERROR(corruption_detected, "Reached end of sequences without finding a block delimiter"); + return blockSize; +} + +/* More a "target" block size */ +static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining) +{ + int const lastBlock = (remaining <= blockSize); + return lastBlock ? remaining : blockSize; +} + +static size_t determine_blockSize(ZSTD_sequenceFormat_e mode, + size_t blockSize, size_t remaining, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) +{ + DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining); + if (mode == ZSTD_sf_noBlockDelimiters) + return blockSize_noDelimiter(blockSize, remaining); + { size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos); + FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters"); + if (explicitBlockSize > blockSize) + RETURN_ERROR(corruption_detected, "sequences incorrectly define a too large block"); + if (explicitBlockSize > remaining) + RETURN_ERROR(srcSize_wrong, "sequences define a frame longer than source"); + return explicitBlockSize; + } +} + /* Compress, block-by-block, all of the sequences given. * * Returns the cumulative size of all compressed blocks (including their headers), @@ -6007,9 +6059,6 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, const void* src, size_t srcSize) { size_t cSize = 0; - U32 lastBlock; - size_t blockSize; - size_t compressedSeqsSize; size_t remaining = srcSize; ZSTD_sequencePosition seqPos = {0, 0, 0}; @@ -6029,10 +6078,15 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, } while (remaining) { + size_t compressedSeqsSize; size_t cBlockSize; size_t additionalByteAdjustment; - lastBlock = remaining <= cctx->blockSize; - blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, + cctx->blockSize, remaining, + inSeqs, inSeqsSize, seqPos); + U32 const lastBlock = (blockSize == remaining); + assert(blockSize <= remaining); + FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); ZSTD_resetSeqStore(&cctx->seqStore); DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); @@ -6113,7 +6167,8 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, return cSize; } -size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, +size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize) { diff --git a/lib/zstd.h b/lib/zstd.h index 349d8017f..8e90ae5f9 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1375,23 +1375,25 @@ typedef enum { } ZSTD_sequenceFormat_e; /*! ZSTD_generateSequences() : - * Generate sequences using ZSTD_compress2, given a source buffer. + * Generate sequences using ZSTD_compress2(), given a source buffer. * * Each block will end with a dummy sequence * with offset == 0, matchLength == 0, and litLength == length of last literals. * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) * simply acts as a block delimiter. * - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 + * @zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2(). * * The output of this function can be fed into ZSTD_compressSequences() with CCtx * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters * @return : number of sequences generated */ -ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t +ZSTD_generateSequences( ZSTD_CCtx* zc, + ZSTD_Sequence* outSeqs, size_t outSeqsSize, + const void* src, size_t srcSize); /*! ZSTD_mergeBlockDelimiters() : * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals @@ -1432,11 +1434,12 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, * and cannot emit an RLE block that disagrees with the repcode history - * @return : final compressed size or a ZSTD error. + * @return : final compressed size, or a ZSTD error code. */ -ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t +ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); /*! ZSTD_writeSkippableFrame() :