]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
added unit tests to ZSTD_compressSequencesAndLiterals()
authorYann Collet <cyan@fb.com>
Thu, 12 Dec 2024 00:13:22 +0000 (16:13 -0800)
committerYann Collet <cyan@fb.com>
Fri, 20 Dec 2024 18:36:58 +0000 (10:36 -0800)
seems to work as expected,
correctly control that `litSize` and `srcSize` are exactly correct.

doc/zstd_manual.html
lib/compress/zstd_compress.c
lib/zstd.h
tests/fuzzer.c

index eed24a7a1ea84f6ca39460f0d3f02a4848dc68ea..7cfeda22d365478e46e552aa309d66e7a85992f5 100644 (file)
@@ -1422,7 +1422,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
  This can be useful if the process generating the sequences also happens to generate the buffer of literals,
  thus skipping an extraction + caching stage.
  It's essentially a speed optimization when the right conditions are met,
- but it also includes so following limitations:
+ but it also is restricted by the following limitations:
  - Only supports explicit delimiter mode
  - Not compatible with frame checksum, which must disabled
  - Can fail when unable to compress sufficiently
index d7ee68060deac6833e079efb67b86db3c5b80598..bb92bd1ffc01350f2b1ede9d3c843fe1f429469e 100644 (file)
@@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
  */
 static size_t
 ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
-                        ZSTD_SequencePosition* seqPos,
+                        ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
                         size_t blockSize,
                         ZSTD_ParamSwitch_e externalRepSearch)
@@ -7114,6 +7114,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
     Repcodes_t updatedRepcodes;
     U32 dictSize;
     size_t startPosInSrc = seqPos->posInSrc;
+    size_t litConsumed = 0;
 
     DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
 
@@ -7150,10 +7151,15 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
         RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
         ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
+        litConsumed += litLength;
     }
 
     /* last sequence (only literals) */
-    seqPos->posInSrc += inSeqs[idx].litLength;
+    {   size_t const lastLitLength = inSeqs[idx].litLength;
+        seqPos->posInSrc += lastLitLength;
+        cctx->seqStore.lit += lastLitLength;  /* register proper length */
+        litConsumed += lastLitLength;
+    }
 
     /* blockSize must be exactly correct (checked before calling this function) */
     assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc;
@@ -7184,6 +7190,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
 
     seqPos->idx = idx+1;
+    *litConsumedPtr = litConsumed;
     return blockSize;
 }
 
@@ -7214,21 +7221,23 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
     }
 
     while (remaining) {
-        size_t compressedSeqsSize;
-        size_t cBlockSize;
+        size_t compressedSeqsSize, cBlockSize, litConsumed;
         size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
                                         cctx->blockSize, remaining,
                                         inSeqs, nbSequences, seqPos);
         U32 const lastBlock = (blockSize == remaining);
         FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
+        RETURN_ERROR_IF(!lastBlock, GENERIC, "Only supports single block");
         assert(blockSize <= remaining);
         ZSTD_resetSeqStore(&cctx->seqStore);
 
         blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx,
-                                   &seqPos,
+                                   &seqPos, &litConsumed,
                                    inSeqs, nbSequences,
                                    blockSize,
                                    cctx->appliedParams.searchForExternalRepcodes);
+        RETURN_ERROR_IF(blockSize != remaining, GENERIC, "Must consume the entire block");
+        RETURN_ERROR_IF(litConsumed != litSize, GENERIC, "Must consume the exact amount of literals provided");
         FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
 
         /* Note: when blockSize is very small, other variant send it uncompressed.
index 67cb4d987006d7a4b5de3b149cc95fb6b7b8ec48..3c0e836c9fe88816ff5e40554d90b6ab4cacc307 100644 (file)
@@ -1666,17 +1666,18 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
  * This can be useful if the process generating the sequences also happens to generate the buffer of literals,
  * thus skipping an extraction + caching stage.
  * It's essentially a speed optimization when the right conditions are met,
- * but it also is restricted by the following limitations:
+ * but it also features the following limitations:
  * - Only supports explicit delimiter mode
+ * - Supports 1 block only (max input 128 KB)
  * - Not compatible with frame checksum, which must disabled
- * - Can fail when unable to compress sufficiently
+ * - Can fail (return an error) when input data cannot be compress sufficiently
  * Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
  * @return : final compressed size, or a ZSTD error code.
  */
 ZSTDLIB_STATIC_API size_t
 ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
                                   void* dst, size_t dstCapacity,
-                            const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                            const ZSTD_Sequence* inSeqs, size_t nbSequences,
                             const void* literals, size_t litSize, size_t srcSize);
 
 
index 2180d1edb3f551e5154a3064792285a1efcbf36d..3063ec2d1a46f75b5d10eea20dc23034b396a2db 100644 (file)
@@ -40,7 +40,6 @@
 #include "datagen.h"      /* RDG_genBuffer */
 #define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
 #include "xxhash.h"       /* XXH64 */
-#include "util.h"
 #include "timefn.h"       /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
 /* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
 #include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
@@ -339,6 +338,35 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
     }
 }
 
+static size_t FUZ_getLitSize(const ZSTD_Sequence* seqs, size_t nbSeqs)
+{
+    size_t n, litSize = 0;
+    assert(seqs != NULL);
+    for (n=0; n<nbSeqs; n++) {
+        litSize += seqs[n].litLength;
+    }
+    return litSize;
+}
+
+static void
+FUZ_transferLiterals(void* dst, size_t dstCapacity,
+                const void* src, size_t srcSize,
+                const ZSTD_Sequence* seqs, size_t nbSeqs)
+{
+    size_t n;
+    const char* ip = (const char*)src;
+    char* op = (char*)dst;
+    size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
+    assert(litSize <= dstCapacity);
+    for (n=0; n<nbSeqs; n++) {
+        size_t const ll = seqs[n].litLength;
+        memcpy(op, ip, ll);
+        op += ll;
+        ip += ll + seqs[n].matchLength;
+    }
+    assert((size_t)(ip - (const char*)src) == srcSize);
+}
+
 #ifdef ZSTD_MULTITHREAD
 
 typedef struct {
@@ -3808,7 +3836,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
         if (seqs == NULL) goto _output_error;
         assert(cctx != NULL);
 
-        /* Populate src with random data */
+        /* Populate src with compressible random data */
         RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
 
         /* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */
@@ -3850,6 +3878,79 @@ static int basicUnitTests(U32 const seed, double compressibility)
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
+    {
+        const size_t srcSize = 100 KB;
+        const BYTE* const src = (BYTE*)CNBuffer;
+        BYTE* const dst = (BYTE*)compressedBuffer;
+        const size_t dstCapacity = ZSTD_compressBound(srcSize);
+        const size_t decompressSize = srcSize;
+        char* const decompressBuffer = (char*)malloc(decompressSize);
+        char* const litBuffer = (char*)malloc(decompressSize);
+        size_t compressedSize;
+
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
+        size_t nbSeqs;
+
+        if (litBuffer == NULL) goto _output_error;
+        if (decompressBuffer == NULL) goto _output_error;
+        if (seqs == NULL) goto _output_error;
+        assert(cctx != NULL);
+
+        /* Populate src with compressible random data */
+        RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
+
+        /* Roundtrip Test using the AndLiterals() variant */
+        nbSeqs = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
+        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
+        {   size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
+            FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs);
+
+            /* not enough literals: must fail */
+            compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize);
+            if (!ZSTD_isError(compressedSize)) {
+                DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n");
+                goto _output_error;
+            }
+
+            /* too many literals: must fail */
+            compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize);
+            if (!ZSTD_isError(compressedSize)) {
+                DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n");
+                goto _output_error;
+            }
+
+            /* correct amount of literals: should compress successfully */
+            compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize);
+            if (ZSTD_isError(compressedSize)) {
+                DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n");
+                goto _output_error;
+            }
+        }
+        {   size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
+            if (ZSTD_isError(dSize)) {
+                DISPLAY("Error during decompression of frame produced by ZSTD_compressSequencesAndLiterals()\n");
+                goto _output_error;
+            }
+            if (dSize != srcSize) {
+                DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() has different size\n");
+                goto _output_error;
+            }
+            if (memcmp(decompressBuffer, src, srcSize)) {
+                DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() produces a different content (of same size)\n");
+                goto _output_error;
+            }
+        }
+
+        ZSTD_freeCCtx(cctx);
+        free(litBuffer);
+        free(decompressBuffer);
+        free(seqs);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     /* Multiple blocks of zeros test */
     #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
     DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);