From: Elliot Gorokhovsky Date: Tue, 31 Jan 2023 18:55:48 +0000 (-0500) Subject: Guard against invalid sequences from external matchfinders (#3465) X-Git-Tag: v1.5.4^2~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=64052ef57d09a9117ccf1c535d3387f0d5270ca9;p=thirdparty%2Fzstd.git Guard against invalid sequences from external matchfinders (#3465) --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 579044dc9..de2db8221 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -277,14 +277,8 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; } -/* Enables validation for external sequences in debug builds. */ static int ZSTD_resolveExternalSequenceValidation(int mode) { -#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) - (void)mode; - return 1; -#else return mode; -#endif } /* Resolves maxBlockSize to the default if no value is present. */ @@ -3050,6 +3044,23 @@ static size_t ZSTD_postProcessExternalMatchFinderResult( } } +/* ZSTD_fastSequenceLengthSum() : + * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*. + * Similar to another function in zstd_compress.c (determine_blockSize), + * except it doesn't check for a block delimiter to end summation. + * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P). + * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */ +static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) { + size_t matchLenSum, litLenSum, i; + matchLenSum = 0; + litLenSum = 0; + for (i = 0; i < seqBufSize; i++) { + litLenSum += seqBuf[i].litLength; + matchLenSum += seqBuf[i].matchLength; + } + return litLenSum + matchLenSum; +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -3167,8 +3178,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* Return early if there is no error, since we don't need to worry about last literals */ if (!ZSTD_isError(nbPostProcessedSeqs)) { ZSTD_sequencePosition seqPos = {0,0,0}; - ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize + size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs); + RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!"); + FORWARD_IF_ERROR( + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, + zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, + src, srcSize + ), + "Failed to copy external sequences to seqStore!" ); ms->ldmSeqStore = NULL; DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); @@ -6286,7 +6304,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ip += inSeqs[idx].litLength; seqPos->posInSrc += inSeqs[idx].litLength; } - RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!"); seqPos->idx = idx+1; return 0; } @@ -6444,13 +6462,13 @@ blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength; if (end) { if (inSeqs[spos].matchLength != 0) - RETURN_ERROR(corruption_detected, "delimiter format error : both matchlength and offset must be == 0"); + RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0"); break; } spos++; } if (!end) - RETURN_ERROR(corruption_detected, "Reached end of sequences without finding a block delimiter"); + RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter"); return blockSize; } @@ -6471,9 +6489,9 @@ static size_t determine_blockSize(ZSTD_sequenceFormat_e mode, { size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos); FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters"); if (explicitBlockSize > blockSize) - RETURN_ERROR(corruption_detected, "sequences incorrectly define a too large block"); + RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block"); if (explicitBlockSize > remaining) - RETURN_ERROR(srcSize_wrong, "sequences define a frame longer than source"); + RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source"); return explicitBlockSize; } } diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 8ae76d519..97c47caff 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -108,6 +108,29 @@ size_t zstreamExternalMatchFinder( compressionLevel, windowSize ); + case EMF_INVALID_OFFSET: + outSeqs[0].offset = 1 << 20; + outSeqs[0].matchLength = 4; + outSeqs[0].litLength = (U32)(srcSize - 4); + return 1; + case EMF_INVALID_MATCHLEN: + outSeqs[0].offset = 1; + outSeqs[0].matchLength = (U32)(srcSize); + outSeqs[0].litLength = 1; + return 1; + case EMF_INVALID_LITLEN: + outSeqs[0].offset = 0; + outSeqs[0].matchLength = 0; + outSeqs[0].litLength = (U32)(srcSize + 1); + return 1; + case EMF_INVALID_LAST_LITS: + outSeqs[0].offset = 1; + outSeqs[0].matchLength = 1; + outSeqs[0].litLength = 1; + outSeqs[1].offset = 0; + outSeqs[1].matchLength = 0; + outSeqs[1].litLength = (U32)(srcSize - 1); + return 2; case EMF_SMALL_ERROR: return outSeqsCapacity + 1; case EMF_BIG_ERROR: diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index e4f7c95f0..7550bbceb 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -20,7 +20,11 @@ typedef enum { EMF_ONE_BIG_SEQ = 1, EMF_LOTS_OF_SEQS = 2, EMF_BIG_ERROR = 3, - EMF_SMALL_ERROR = 4 + EMF_SMALL_ERROR = 4, + EMF_INVALID_OFFSET = 5, + EMF_INVALID_MATCHLEN = 6, + EMF_INVALID_LITLEN = 7, + EMF_INVALID_LAST_LITS = 8 } EMF_testCase; size_t zstreamExternalMatchFinder( diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 43a26405f..310012cb5 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -128,6 +128,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer); setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer); + setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer); if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); } diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index db1e25188..aff847b4d 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1873,48 +1873,61 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) * check that the reference is preserved across compressions */ ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); - for (enableFallback = 0; enableFallback < 1; enableFallback++) { + for (enableFallback = 0; enableFallback <= 1; enableFallback++) { size_t testCaseId; + size_t const numTestCases = 9; - EMF_testCase const EMF_successCases[] = { + EMF_testCase const testCases[] = { EMF_ONE_BIG_SEQ, EMF_LOTS_OF_SEQS, - }; - size_t const EMF_numSuccessCases = 2; - - EMF_testCase const EMF_failureCases[] = { EMF_ZERO_SEQS, EMF_BIG_ERROR, EMF_SMALL_ERROR, + EMF_INVALID_OFFSET, + EMF_INVALID_MATCHLEN, + EMF_INVALID_LITLEN, + EMF_INVALID_LAST_LITS }; - size_t const EMF_numFailureCases = 3; - /* Test external matchfinder success scenarios */ - for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) { - size_t res; - externalMatchState = EMF_successCases[testCaseId]; - ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); - res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); - CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); - CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); - CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); - } + ZSTD_ErrorCode const errorCodes[] = { + ZSTD_error_no_error, + ZSTD_error_no_error, + ZSTD_error_externalMatchFinder_failed, + ZSTD_error_externalMatchFinder_failed, + ZSTD_error_externalMatchFinder_failed, + ZSTD_error_externalSequences_invalid, + ZSTD_error_externalSequences_invalid, + ZSTD_error_externalSequences_invalid, + ZSTD_error_externalSequences_invalid + }; - /* Test external matchfinder failure scenarios */ - for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) { + for (testCaseId = 0; testCaseId < numTestCases; testCaseId++) { size_t res; - externalMatchState = EMF_failureCases[testCaseId]; + + int const compressionShouldSucceed = ( + (errorCodes[testCaseId] == ZSTD_error_no_error) || + (enableFallback && errorCodes[testCaseId] == ZSTD_error_externalMatchFinder_failed) + ); + + int const testWithSequenceValidation = ( + testCases[testCaseId] == EMF_INVALID_OFFSET + ); + + externalMatchState = testCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, testWithSequenceValidation)); CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); - if (enableFallback) { + + if (compressionShouldSucceed) { + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); } else { CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); CHECK( - ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed, + ZSTD_getErrorCode(res) != errorCodes[testCaseId], "EMF: Wrong error code: %s", ZSTD_getErrorName(res) ); }