]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
ZSTD_compressSequencesAndLiterals() now supports multi-blocks frames.
authorYann Collet <cyan@fb.com>
Tue, 17 Dec 2024 02:05:40 +0000 (18:05 -0800)
committerYann Collet <cyan@fb.com>
Fri, 20 Dec 2024 18:36:59 +0000 (10:36 -0800)
lib/compress/zstd_compress.c
lib/zstd.h
tests/fuzzer.c

index be6107cb8e136b11c13770be63534f60e00f5638..71e40bdc14657fa7898fa91a1e4b37ec12faf32f 100644 (file)
@@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
  */
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
-                        ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
+                        ZSTD_SequencePosition* seqPos,
                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
                         size_t blockSize,
                         int const repcodeResolution,
@@ -7197,37 +7197,36 @@ ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
 
     seqPos->idx = idx+1;
-    *litConsumedPtr = litConsumed;
-    return blockSize;
+    return litConsumed;
 }
 
 typedef size_t (*ZSTD_transferSequencesOnly_f) (ZSTD_CCtx* cctx,
-                        ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
+                        ZSTD_SequencePosition* seqPos,
                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
                         size_t blockSize,
                         int const repcodeResolution);
 
 static size_t
 ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
-                        ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
+                        ZSTD_SequencePosition* seqPos,
                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
                         size_t blockSize,
                         int const repcodeResolution)
 {
     return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
-                seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize,
+                seqPos, inSeqs, nbSequences, blockSize,
                 repcodeResolution, 0);
 }
 
 static size_t
 ZSTD_transferSequencesOnly_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx,
-                        ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
+                        ZSTD_SequencePosition* seqPos,
                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
                         size_t blockSize,
                         int const repcodeResolution)
 {
     return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
-                seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize,
+                seqPos, inSeqs, nbSequences, blockSize,
                 repcodeResolution, 1);
 }
 
@@ -7269,28 +7268,26 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
                                         inSeqs, nbSequences, seqPos);
         U32 const lastBlock = (blockSize == remaining);
         FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
-        RETURN_ERROR_IF(!lastBlock, srcSize_wrong, "Only supports single block");
         assert(blockSize <= remaining);
         ZSTD_resetSeqStore(&cctx->seqStore);
 
-        blockSize = transfer(cctx,
-                            &seqPos, &litConsumed,
+        litConsumed = transfer(cctx,
+                            &seqPos,
                             inSeqs, nbSequences,
                             blockSize,
                             repcodeResolution);
-        RETURN_ERROR_IF(blockSize != remaining, externalSequences_invalid, "Must consume the entire block");
-        RETURN_ERROR_IF(litConsumed != litSize, externalSequences_invalid, "Must consume the exact amount of literals provided");
-        FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
+        FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion");
+        RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences");
 
         /* Note: when blockSize is very small, other variant send it uncompressed.
-         * Here, we still send the sequences, because we don't have the source to send it uncompressed.
-         * In theory, it would be possible to reproduce the source from the sequences,
-         * but that's pretty complex and memory intensive, which goes against the principles of this variant. */
+         * Here, we still send the sequences, because we don't have the original source to send it uncompressed.
+         * One could imagine it possible to reproduce the source from the sequences,
+         * but that's complex and costly memory intensive, which goes against the objectives of this variant. */
 
         RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
         compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer(
                                 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
-                                literals, litSize,
+                                literals, litConsumed,
                                 blockSize,
                                 &cctx->seqStore,
                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
@@ -7299,17 +7296,19 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
                                 cctx->bmi2);
         FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
         DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
+        litSize -= litConsumed;
+        literals = (const char*)literals + litConsumed;
 
         /* Note: difficult to check source for RLE block when only Literals are provided,
          * but it could be considered from analyzing the sequence directly */
 
         if (compressedSeqsSize == 0) {
-            /* Sending uncompressed blocks is difficult, because the source is not provided.
+            /* Sending uncompressed blocks is out of reach, because the source is not provided.
              * In theory, one could use the sequences to regenerate the source, like a decompressor,
              * but it's complex, and memory hungry, killing the purpose of this variant.
              * Current outcome: generate an error code.
              */
-            RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code might be misleading */
+            RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code could be clearer */
         } else {
             U32 cBlockHeader;
             assert(compressedSeqsSize > 1); /* no RLE */
@@ -7338,6 +7337,7 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
         DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
     }
 
+    RETURN_ERROR_IF(litSize != 0, externalSequences_invalid, "literals must be entirely and exactly consumed");
     DEBUGLOG(4, "cSize final total: %zu", cSize);
     return cSize;
 }
index ffcb059cf8da7b80216801c1ab5b1fcbbb2048e6..68e78b3cac00c1835f3c5375677596dcb2f1e282 100644 (file)
@@ -1674,10 +1674,9 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
  * It's a speed optimization, useful when the right conditions are met,
  * but it also features the following limitations:
  * - Only supports explicit delimiter mode
- * - Supports 1 block only (max input 128 KB)
  * - Not compatible with frame checksum, which must disabled
- * - Can fail (return an error) when input data cannot be compress sufficiently
- * - @litSize must be == sum of all @.litLength fields in @inSeqs. Discrepancy will generate an error.
+ * - If any block is incompressible, will fail and return an error
+ * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
  * - the buffer @literals must be larger than @litSize by at least 8 bytes.
  * @return : final compressed size, or a ZSTD error code.
  */
index 3063ec2d1a46f75b5d10eea20dc23034b396a2db..4901376ffa2e1255969baa8e9cba7ee0089ce00a 100644 (file)
@@ -3880,7 +3880,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
 
     DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
     {
-        const size_t srcSize = 100 KB;
+        const size_t srcSize = 500 KB;
         const BYTE* const src = (BYTE*)CNBuffer;
         BYTE* const dst = (BYTE*)compressedBuffer;
         const size_t dstCapacity = ZSTD_compressBound(srcSize);