fix test 122

author Yann Collet <cyan@fb.com>

Sat, 3 Aug 2019 14:43:34 +0000 (16:43 +0200)

committer Yann Collet <cyan@fb.com>

Sat, 3 Aug 2019 14:51:26 +0000 (16:51 +0200)
author Yann Collet <cyan@fb.com>
Sat, 3 Aug 2019 14:43:34 +0000 (16:43 +0200)
committer Yann Collet <cyan@fb.com>
Sat, 3 Aug 2019 14:51:26 +0000 (16:51 +0200)
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html

index 6d8e74fe52746a65a3dd066c16154650b4f03a58..d7058e30e0337dfdb2d01400467d5e3c34d0f5ee 100644 (file)
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1557,7 +1557,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
  <a name="Chapter23"></a><h2>Block level API</h2><pre></pre>
  
  <pre><b></b><p>    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
-    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
  
      A few rules to respect :
      - Compressing and decompressing require a context structure
@@ -1568,12 +1568,14 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
        + copyCCtx() and copyDCtx() can be used too
      - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
        + If input is larger than a block size, it's necessary to split input data into multiple blocks
-      + For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
-        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
-    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
-      In which case, nothing is produced into `dst` !
-      + User must test for such outcome and deal directly with uncompressed data
-      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
        + In case of multiple successive blocks, should some of them be uncompressed,
          decoder must be informed of their existence in order to follow proper history.
          Use ZSTD_insertBlock() for such a case.
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h

index 81b16eac2ea62391d64aa3113b82c4a208c1f95a..8c96fd68592becacca1305bdef3a6d88868572a3 100644 (file)
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -56,9 +56,9 @@ extern "C" {
  /**
   * Return the specified error if the condition evaluates to true.
   *
- * In debug modes, prints additional information. In order to do that
- * (particularly, printing the conditional that failed), this can't just wrap
- * RETURN_ERROR().
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
   */
  #define RETURN_ERROR_IF(cond, err, ...) \
    if (cond) { \
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c

index 1200d828c063a03e172ccd744e4229d5e28f7587..cd73db13be01a00154553a3656d511d2e742a42a 100644 (file)
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1955,7 +1955,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
      BYTE* const ostart = (BYTE*)dst;
      BYTE* const oend = ostart + dstCapacity;
      BYTE* op = ostart;
-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
      BYTE* seqHead;
      BYTE* lastNCount = NULL;
  
@@ -1964,7 +1964,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
  
      /* Compress literals */
      {   const BYTE* const literals = seqStorePtr->litStart;
-        size_t const litSize = seqStorePtr->lit - literals;
+        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
          size_t const cSize = ZSTD_compressLiterals(
                                      &prevEntropy->huf, &nextEntropy->huf,
                                      cctxParams->cParams.strategy,
@@ -1991,7 +1991,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
      if (nbSeq==0) {
          /* Copy the old tables over as if we repeated them */
          memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
-        return op - ostart;
+        return (size_t)(op - ostart);
      }
  
      /* seqHead : flags for FSE encoding type */
@@ -2012,7 +2012,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
                                          ZSTD_defaultAllowed, strategy);
          assert(set_basic < set_compressed && set_rle < set_compressed);
          assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+        {   size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
                                                      count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
                                                      prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
                                                      workspace, wkspSize);
@@ -2035,7 +2035,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
                                          OF_defaultNorm, OF_defaultNormLog,
                                          defaultPolicy, strategy);
          assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+        {   size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
                                                      count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
                                                      prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
                                                      workspace, wkspSize);
@@ -2056,7 +2056,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
                                          ML_defaultNorm, ML_defaultNormLog,
                                          ZSTD_defaultAllowed, strategy);
          assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+        {   size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
                                                      count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
                                                      prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
                                                      workspace, wkspSize);
@@ -2070,7 +2070,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
      *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
  
      {   size_t const bitstreamSize = ZSTD_encodeSequences(
-                                        op, oend - op,
+                                        op, (size_t)(oend - op),
                                          CTable_MatchLength, mlCodeTable,
                                          CTable_OffsetBits, ofCodeTable,
                                          CTable_LitLength, llCodeTable,
@@ -2097,7 +2097,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
      }
  
      DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
-    return op - ostart;
+    return (size_t)(op - ostart);
  }
  
  MEM_STATIC size_t
@@ -2539,8 +2539,9 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
  
  size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  {
-    size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
-    RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong);
+    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); }
  
      return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
  }
@@ -2565,7 +2566,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
      if (srcSize <= HASH_READ_SIZE) return 0;
  
      while (iend - ip > HASH_READ_SIZE) {
-        size_t const remaining = iend - ip;
+        size_t const remaining = (size_t)(iend - ip);
          size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
          const BYTE* const ichunk = ip + chunk;
  
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c

index cf99a137725a0f9c4e4bbda7bcd929557d4cb9ff..751060b2cd1331480a9f2617ce27e056ed318a19 100644 (file)
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -574,9 +574,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
  }
  
  /** ZSTD_insertBlock() :
-    insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
  size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
  {
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
      ZSTD_checkContinuity(dctx, blockStart);
      dctx->previousDstEnd = (const char*)blockStart + blockSize;
      return blockSize;
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c

index 24f4859c56c94fd9d42d803bcbe464be18d85b9f..cbcfc0840610260135060be7add2d567089cbb08 100644 (file)
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                            const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
  {
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
      RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
  
      {   const BYTE* const istart = (const BYTE*) src;
@@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
          switch(litEncType)
          {
          case set_repeat:
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
              RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
              /* fall-through */
  
@@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                      /* 2 - 2 - 18 - 18 */
                      lhSize = 5;
                      litSize  = (lhc >> 4) & 0x3FFFF;
-                    litCSize = (lhc >> 22) + (istart[4] << 10);
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
                      break;
                  }
                  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
                      symbolNext[s] = 1;
                  } else {
                      if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
-                    symbolNext[s] = normalizedCounter[s];
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
          }   }   }
          memcpy(dt, &DTableH, sizeof(DTableH));
      }
diff --git a/lib/zstd.h b/lib/zstd.h

index 4a1f81610c212282f357cdd6c8a8687a35d4ad1e..923517c0ba81799fb8081bab21b120d2e195d4ae 100644 (file)
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1909,7 +1909,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
  /*!
      Block functions produce and decode raw zstd blocks, without frame metadata.
      Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
-    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
  
      A few rules to respect :
      - Compressing and decompressing require a context structure
@@ -1920,12 +1920,14 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
        + copyCCtx() and copyDCtx() can be used too
      - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
        + If input is larger than a block size, it's necessary to split input data into multiple blocks
-      + For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
-        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
-    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
-      In which case, nothing is produced into `dst` !
-      + User must test for such outcome and deal directly with uncompressed data
-      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
        + In case of multiple successive blocks, should some of them be uncompressed,
          decoder must be informed of their existence in order to follow proper history.
          Use ZSTD_insertBlock() for such a case.
diff --git a/tests/fuzzer.c b/tests/fuzzer.c

index a663111da23f4968a1e7e0363421f007299a01c0..2de7c0096ff975513520c646648dcc80c06cb0d9 100644 (file)
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1907,8 +1907,10 @@ static int basicUnitTests(U32 const seed, double compressibility)
          DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++);
          CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
          CHECK_VAR(cSize,  ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize));
-        CHECK( ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) );  /* just to ensure cctx history consistency */
-        memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize);   /* fake non-compressed block (without header) */
+        RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed);  /* create a non-compressible second block */
+        { CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) );  /* for cctx history consistency */
+          assert(r == 0); /* non-compressible block */ }
+        memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize);   /* send non-compressed block (without header) */
          CHECK_VAR(cSize2, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize),
                                                     (char*)CNBuffer+dictSize+2*blockSize, blockSize));
          DISPLAYLEVEL(3, "OK \n");
@@ -1927,6 +1929,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
                  DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() and after insertBlock() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize);
                  goto _output_error;
          }   }
+        assert(memcpy((char*)CNBuffer+dictSize, decodedBuffer, blockSize*3));  /* ensure regenerated content is identical to origin */
          DISPLAYLEVEL(3, "OK \n");
  
          DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++);
author	Yann Collet <cyan@fb.com>
	Sat, 3 Aug 2019 14:43:34 +0000 (16:43 +0200)
committer	Yann Collet <cyan@fb.com>
	Sat, 3 Aug 2019 14:51:26 +0000 (16:51 +0200)
doc/zstd_manual.html		patch \| blob \| blame \| history
lib/common/zstd_internal.h		patch \| blob \| blame \| history
lib/compress/zstd_compress.c		patch \| blob \| blame \| history
lib/decompress/zstd_decompress.c		patch \| blob \| blame \| history
lib/decompress/zstd_decompress_block.c		patch \| blob \| blame \| history
lib/zstd.h		patch \| blob \| blame \| history
tests/fuzzer.c		patch \| blob \| blame \| history