From: Yann Collet Date: Tue, 17 Dec 2024 02:05:40 +0000 (-0800) Subject: ZSTD_compressSequencesAndLiterals() now supports multi-blocks frames. X-Git-Tag: v1.5.7^2~48^2~22 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=31b5ef25393c3abf4bb9e290cf32b06d97c78b93;p=thirdparty%2Fzstd.git ZSTD_compressSequencesAndLiterals() now supports multi-blocks frames. --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index be6107cb8..71e40bdc1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, */ FORCE_INLINE_TEMPLATE size_t ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr, + ZSTD_SequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t nbSequences, size_t blockSize, int const repcodeResolution, @@ -7197,37 +7197,36 @@ ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx, ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); seqPos->idx = idx+1; - *litConsumedPtr = litConsumed; - return blockSize; + return litConsumed; } typedef size_t (*ZSTD_transferSequencesOnly_f) (ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr, + ZSTD_SequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t nbSequences, size_t blockSize, int const repcodeResolution); static size_t ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr, + ZSTD_SequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t nbSequences, size_t blockSize, int const repcodeResolution) { return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx, - seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize, + seqPos, inSeqs, nbSequences, blockSize, repcodeResolution, 0); } static size_t ZSTD_transferSequencesOnly_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr, + ZSTD_SequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t nbSequences, size_t blockSize, int const repcodeResolution) { return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx, - seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize, + seqPos, inSeqs, nbSequences, blockSize, repcodeResolution, 1); } @@ -7269,28 +7268,26 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, inSeqs, nbSequences, seqPos); U32 const lastBlock = (blockSize == remaining); FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); - RETURN_ERROR_IF(!lastBlock, srcSize_wrong, "Only supports single block"); assert(blockSize <= remaining); ZSTD_resetSeqStore(&cctx->seqStore); - blockSize = transfer(cctx, - &seqPos, &litConsumed, + litConsumed = transfer(cctx, + &seqPos, inSeqs, nbSequences, blockSize, repcodeResolution); - RETURN_ERROR_IF(blockSize != remaining, externalSequences_invalid, "Must consume the entire block"); - RETURN_ERROR_IF(litConsumed != litSize, externalSequences_invalid, "Must consume the exact amount of literals provided"); - FORWARD_IF_ERROR(blockSize, "Bad sequence copy"); + FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion"); + RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences"); /* Note: when blockSize is very small, other variant send it uncompressed. - * Here, we still send the sequences, because we don't have the source to send it uncompressed. - * In theory, it would be possible to reproduce the source from the sequences, - * but that's pretty complex and memory intensive, which goes against the principles of this variant. */ + * Here, we still send the sequences, because we don't have the original source to send it uncompressed. + * One could imagine it possible to reproduce the source from the sequences, + * but that's complex and costly memory intensive, which goes against the objectives of this variant. */ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer( op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, - literals, litSize, + literals, litConsumed, blockSize, &cctx->seqStore, &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, @@ -7299,17 +7296,19 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, cctx->bmi2); FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); + litSize -= litConsumed; + literals = (const char*)literals + litConsumed; /* Note: difficult to check source for RLE block when only Literals are provided, * but it could be considered from analyzing the sequence directly */ if (compressedSeqsSize == 0) { - /* Sending uncompressed blocks is difficult, because the source is not provided. + /* Sending uncompressed blocks is out of reach, because the source is not provided. * In theory, one could use the sequences to regenerate the source, like a decompressor, * but it's complex, and memory hungry, killing the purpose of this variant. * Current outcome: generate an error code. */ - RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code might be misleading */ + RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code could be clearer */ } else { U32 cBlockHeader; assert(compressedSeqsSize > 1); /* no RLE */ @@ -7338,6 +7337,7 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); } + RETURN_ERROR_IF(litSize != 0, externalSequences_invalid, "literals must be entirely and exactly consumed"); DEBUGLOG(4, "cSize final total: %zu", cSize); return cSize; } diff --git a/lib/zstd.h b/lib/zstd.h index ffcb059cf..68e78b3ca 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1674,10 +1674,9 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx, * It's a speed optimization, useful when the right conditions are met, * but it also features the following limitations: * - Only supports explicit delimiter mode - * - Supports 1 block only (max input 128 KB) * - Not compatible with frame checksum, which must disabled - * - Can fail (return an error) when input data cannot be compress sufficiently - * - @litSize must be == sum of all @.litLength fields in @inSeqs. Discrepancy will generate an error. + * - If any block is incompressible, will fail and return an error + * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. * - the buffer @literals must be larger than @litSize by at least 8 bytes. * @return : final compressed size, or a ZSTD error code. */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 3063ec2d1..4901376ff 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -3880,7 +3880,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++); { - const size_t srcSize = 100 KB; + const size_t srcSize = 500 KB; const BYTE* const src = (BYTE*)CNBuffer; BYTE* const dst = (BYTE*)compressedBuffer; const size_t dstCapacity = ZSTD_compressBound(srcSize);