]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Add experimental param for sequence validation 2387/head
authorsenhuang42 <senhuang96@fb.com>
Fri, 20 Nov 2020 16:23:22 +0000 (11:23 -0500)
committersenhuang42 <senhuang96@fb.com>
Fri, 20 Nov 2020 16:57:41 +0000 (11:57 -0500)
lib/compress/zstd_compress.c
lib/compress/zstd_compress_internal.h
lib/zstd.h
tests/fuzz/sequence_compression_api [deleted file]

index e8f59b344ca59b66cdcb48b447c5a89fb1238cc7..9f88e8b419fa84d4ade73438a295ff7620ba78f4 100644 (file)
@@ -462,6 +462,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
         bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
         bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
         return bounds;
+    
+    case ZSTD_c_validateSequences:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
 
     default:
         bounds.error = ERROR(parameter_unsupported);
@@ -523,6 +528,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
     case ZSTD_c_stableInBuffer:
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
     default:
         return 0;
     }
@@ -574,6 +580,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
     case ZSTD_c_stableInBuffer:
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
         break;
 
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -779,6 +786,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         BOUNDCHECK(ZSTD_c_blockDelimiters, value);
         CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
         return CCtxParams->blockDelimiters;
+    
+    case ZSTD_c_validateSequences:
+        BOUNDCHECK(ZSTD_c_validateSequences, value);
+        CCtxParams->validateSequences = value;
+        return CCtxParams->validateSequences;
 
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
@@ -900,6 +912,9 @@ size_t ZSTD_CCtxParams_getParameter(
     case ZSTD_c_blockDelimiters :
         *value = (int)CCtxParams->blockDelimiters;
         break;
+    case ZSTD_c_validateSequences :
+        *value = (int)CCtxParams->validateSequences;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -4561,10 +4576,12 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
         updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
 
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
-        seqPos->posInSrc += litLength + matchLength;
-        FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                               cctx->appliedParams.cParams.windowLog, dictSize),
-                                               "Sequence validation failed");
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                cctx->appliedParams.cParams.windowLog, dictSize),
+                                                "Sequence validation failed");
+        }
         ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
         ip += matchLength + litLength;
     }
@@ -4681,10 +4698,12 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
             updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
         }
 
-        seqPos->posInSrc += litLength + matchLength;
-        FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                               cctx->appliedParams.cParams.windowLog, dictSize),
-                         "Sequence validation failed");
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                   cctx->appliedParams.cParams.windowLog, dictSize),
+                                                   "Sequence validation failed");
+        }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
         ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
         ip += matchLength + litLength;
@@ -4712,17 +4731,12 @@ typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* s
                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
                                        const void* src, size_t blockSize);
 static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+    ZSTD_sequenceCopier sequenceCopier = NULL;
     assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
-    ZSTD_sequenceCopier sequenceCopier;
-    switch (mode) {
-        case ZSTD_sf_noBlockDelimiters:
-            sequenceCopier = ZSTD_copySequencesToSeqStoreNoBlockDelim;
-            break;
-        case ZSTD_sf_explicitBlockDelimiters:
-            sequenceCopier = ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
-            break;
-        default:
-            assert(0); /* Unreachable due to as param validated in bounds */
+    if (mode == ZSTD_sf_explicitBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
+    } else if (mode == ZSTD_sf_noBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
     }
     assert(sequenceCopier != NULL);
     return sequenceCopier;
@@ -4745,7 +4759,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
     
     BYTE const* ip = (BYTE const*)src;
     BYTE* op = (BYTE*)dst;
-    const ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
 
     DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
     /* Special case: empty frame */
index 51cb66d7b17664da3ece2737d5304be9c43ff693..ee0523458a97bff6dc7b9a80bd97f8da4f2642fe 100644 (file)
@@ -244,6 +244,7 @@ struct ZSTD_CCtx_params_s {
 
     /* Sequence compression API */
     ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
 
     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
     ZSTD_customMem customMem;
index 103c9e969341d92e064dea2f03ac83217d6a67aa..1b525bd72d54fd0bcd4b5761bec3cd42491ba2c7 100644 (file)
@@ -418,6 +418,7 @@ typedef enum {
      * ZSTD_c_stableInBuffer
      * ZSTD_c_stableOutBuffer
      * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -432,7 +433,8 @@ typedef enum {
      ZSTD_c_experimentalParam8=1005,
      ZSTD_c_experimentalParam9=1006,
      ZSTD_c_experimentalParam10=1007,
-     ZSTD_c_experimentalParam11=1008
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009
 } ZSTD_cParameter;
 
 typedef struct {
@@ -1316,8 +1318,8 @@ typedef enum {
  * zc can be used to insert custom compression params.
  * This function invokes ZSTD_compress2
  * 
- * The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters
- * set to ZSTD_sf_explicitBlockDelimiters
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
  * @return : number of sequences generated
  */
 
@@ -1331,8 +1333,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
  * As such, the final generated result has no explicit representation of block boundaries,
  * and the final last literals segment is not represented in the sequences.
  * 
- * The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters
- * set to ZSTD_sf_noBlockDelimiters
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
  * @return : number of sequences left after merging
  */
 ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
@@ -1348,16 +1350,20 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
  *    the block size derived from the cctx, and sequences may be split. This is the default setting.
  * 
  *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
- *    block delimiters (defined in ZSTD_Sequence).
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
  * 
- *    In addition to ZSTD_c_blockDelimiters, other noteworthy cctx parameters are the compression level and window log.
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ * 
+ *    In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
  *    - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
  *    - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
  *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
  * 
- * Note:
- * - Repcodes are, as of now, always re-calculated, so ZSTD_Sequence::rep is never used.
- * 
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
  * @return : final compressed size or a ZSTD error.
  */
 ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
@@ -1766,11 +1772,30 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
  * Default is 0 == ZSTD_sf_noBlockDelimiters.
  * 
  * For use with sequence compression API: ZSTD_compressSequences().
+ * 
  * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
- * which are defined as sequences with offset == 0 and matchLength == 0.
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
  */
 #define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
 
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ * 
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ * 
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ * 
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ * 
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
diff --git a/tests/fuzz/sequence_compression_api b/tests/fuzz/sequence_compression_api
deleted file mode 100755 (executable)
index 4f75a14..0000000
Binary files a/tests/fuzz/sequence_compression_api and /dev/null differ