From: Nick Terrell Date: Tue, 12 May 2020 02:05:42 +0000 (-0700) Subject: [lib][fuzz] Assert sequences are valid in round trip tests X-Git-Tag: v1.4.5^2~17^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4b88bd3ee0474b09770af15effa141e889144a41;p=thirdparty%2Fzstd.git [lib][fuzz] Assert sequences are valid in round trip tests --- diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index def421e90..65ad2f27f 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -929,11 +929,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const ilimit = iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; - const U32 lowestIndex = ms->window.lowLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + lowestIndex; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, @@ -951,10 +951,10 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( size_t offset=0; const BYTE* start=ip+1; U32 current = (U32)(ip-base); - U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, ms->cParams.windowLog); /* check repCode */ - { const U32 repIndex = (U32)(current+1 - offset_1); + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); + const U32 repIndex = (U32)(current+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ @@ -982,13 +982,12 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( while (ipcParams.windowLog); /* check repCode */ if (offset) { + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - windowLow = ZSTD_getLowestMatchIndex(ms, current, ms->cParams.windowLog); if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ @@ -1014,9 +1013,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( if ((depth==2) && (ipcParams.windowLog); /* check repCode */ if (offset) { + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; @@ -1061,10 +1060,10 @@ _storeSequence: /* check immediate repcode */ while (ip <= ilimit) { + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); const U32 repIndex = (U32)((ip-base) - offset_2); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - windowLow = ZSTD_getLowestMatchIndex(ms, current, ms->cParams.windowLog); if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c index 042a7bc85..b0e025e7a 100644 --- a/lib/decompress/zstd_ddict.c +++ b/lib/decompress/zstd_ddict.c @@ -65,6 +65,9 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) dctx->virtualStart = ddict->dictContent; dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif if (ddict->entropyPresent) { dctx->litEntropy = 1; dctx->fseEntropy = 1; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 90ddeaabe..d8f6eb7db 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -114,6 +114,9 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); dctx->outBufferMode = ZSTD_obm_buffered; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif } ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) @@ -1039,6 +1042,9 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dict; dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif return 0; } diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 3b9ba3771..b2a10c0d8 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -947,13 +947,52 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c return seq; } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)(prefixStart - virtualStart); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +} +#endif + #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -966,6 +1005,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1030,6 +1070,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, for ( ; ; ) { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); BIT_reloadDStream(&(seqState.DStream)); /* gcc and clang both don't like early returns in this loop. @@ -1068,9 +1111,10 @@ static size_t ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1080,7 +1124,8 @@ ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -1092,6 +1137,7 @@ ZSTD_decompressSequencesLong_body( const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1127,6 +1173,9 @@ ZSTD_decompressSequencesLong_body( for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNbbmi2) { - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1230,15 +1287,16 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1333,12 +1391,12 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (usePrefetchDecoder) #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif } } diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 8c4f79f46..1aa1face0 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -160,6 +160,10 @@ struct ZSTD_DCtx_s BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentEndForFuzzing; +#endif }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 3c7dc09e2..d71c34d68 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -176,8 +176,8 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_zstd_frame_info.o dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o $(LIB_FUZZING_ENGINE) -o $@ -libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o - $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o +libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o + $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o corpora/%_seed_corpus.zip: @mkdir -p corpora