From: senhuang42 Date: Fri, 20 Nov 2020 16:23:22 +0000 (-0500) Subject: Add experimental param for sequence validation X-Git-Tag: v1.4.7~30^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7742f076b4b91b9e721a6ba1abdb2461a4311a52;p=thirdparty%2Fzstd.git Add experimental param for sequence validation --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e8f59b344..9f88e8b41 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -462,6 +462,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; default: bounds.error = ERROR(parameter_unsupported); @@ -523,6 +528,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_stableInBuffer: case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: default: return 0; } @@ -574,6 +580,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_stableInBuffer: case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -779,6 +786,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, BOUNDCHECK(ZSTD_c_blockDelimiters, value); CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } @@ -900,6 +912,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_blockDelimiters : *value = (int)CCtxParams->blockDelimiters; break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -4561,10 +4576,12 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); - seqPos->posInSrc += litLength + matchLength; - FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize), - "Sequence validation failed"); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize), + "Sequence validation failed"); + } ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); ip += matchLength + litLength; } @@ -4681,10 +4698,12 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); } - seqPos->posInSrc += litLength + matchLength; - FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize), - "Sequence validation failed"); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize), + "Sequence validation failed"); + } DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); ip += matchLength + litLength; @@ -4712,17 +4731,12 @@ typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* s const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize); static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); - ZSTD_sequenceCopier sequenceCopier; - switch (mode) { - case ZSTD_sf_noBlockDelimiters: - sequenceCopier = ZSTD_copySequencesToSeqStoreNoBlockDelim; - break; - case ZSTD_sf_explicitBlockDelimiters: - sequenceCopier = ZSTD_copySequencesToSeqStoreExplicitBlockDelim; - break; - default: - assert(0); /* Unreachable due to as param validated in bounds */ + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; } assert(sequenceCopier != NULL); return sequenceCopier; @@ -4745,7 +4759,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, BYTE const* ip = (BYTE const*)src; BYTE* op = (BYTE*)dst; - const ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); /* Special case: empty frame */ diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 51cb66d7b..ee0523458 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -244,6 +244,7 @@ struct ZSTD_CCtx_params_s { /* Sequence compression API */ ZSTD_sequenceFormat_e blockDelimiters; + int validateSequences; /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; diff --git a/lib/zstd.h b/lib/zstd.h index 103c9e969..1b525bd72 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -418,6 +418,7 @@ typedef enum { * ZSTD_c_stableInBuffer * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -432,7 +433,8 @@ typedef enum { ZSTD_c_experimentalParam8=1005, ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam10=1007, - ZSTD_c_experimentalParam11=1008 + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 } ZSTD_cParameter; typedef struct { @@ -1316,8 +1318,8 @@ typedef enum { * zc can be used to insert custom compression params. * This function invokes ZSTD_compress2 * - * The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters - * set to ZSTD_sf_explicitBlockDelimiters + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters * @return : number of sequences generated */ @@ -1331,8 +1333,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, * As such, the final generated result has no explicit representation of block boundaries, * and the final last literals segment is not represented in the sequences. * - * The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters - * set to ZSTD_sf_noBlockDelimiters + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters * @return : number of sequences left after merging */ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); @@ -1348,16 +1350,20 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se * the block size derived from the cctx, and sequences may be split. This is the default setting. * * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain - * block delimiters (defined in ZSTD_Sequence). + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. * - * In addition to ZSTD_c_blockDelimiters, other noteworthy cctx parameters are the compression level and window log. + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log. * - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression. * - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md * - * Note: - * - Repcodes are, as of now, always re-calculated, so ZSTD_Sequence::rep is never used. - * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history * @return : final compressed size or a ZSTD error. */ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, @@ -1766,11 +1772,30 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * Default is 0 == ZSTD_sf_noBlockDelimiters. * * For use with sequence compression API: ZSTD_compressSequences(). + * * Designates whether or not the given array of ZSTD_Sequence contains block delimiters - * which are defined as sequences with offset == 0 and matchLength == 0. + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. */ #define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. diff --git a/tests/fuzz/sequence_compression_api b/tests/fuzz/sequence_compression_api deleted file mode 100755 index 4f75a144f..000000000 Binary files a/tests/fuzz/sequence_compression_api and /dev/null differ