]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Guard against invalid sequences from external matchfinders (#3465)
authorElliot Gorokhovsky <embg@fb.com>
Tue, 31 Jan 2023 18:55:48 +0000 (13:55 -0500)
committerGitHub <noreply@github.com>
Tue, 31 Jan 2023 18:55:48 +0000 (13:55 -0500)
lib/compress/zstd_compress.c
tests/external_matchfinder.c
tests/external_matchfinder.h
tests/fuzz/zstd_helpers.c
tests/zstreamtest.c

index 579044dc9df99b377a1728666059b87dcf16f200..de2db82216fc913a3a4aa91467eb023f55969e1b 100644 (file)
@@ -277,14 +277,8 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
     return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
 }
 
-/* Enables validation for external sequences in debug builds. */
 static int ZSTD_resolveExternalSequenceValidation(int mode) {
-#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2)
-    (void)mode;
-    return 1;
-#else
     return mode;
-#endif
 }
 
 /* Resolves maxBlockSize to the default if no value is present. */
@@ -3050,6 +3044,23 @@ static size_t ZSTD_postProcessExternalMatchFinderResult(
     }
 }
 
+/* ZSTD_fastSequenceLengthSum() :
+ * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
+ * Similar to another function in zstd_compress.c (determine_blockSize),
+ * except it doesn't check for a block delimiter to end summation.
+ * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
+ * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
+static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
+    size_t matchLenSum, litLenSum, i;
+    matchLenSum = 0;
+    litLenSum = 0;
+    for (i = 0; i < seqBufSize; i++) {
+        litLenSum += seqBuf[i].litLength;
+        matchLenSum += seqBuf[i].matchLength;
+    }
+    return litLenSum + matchLenSum;
+}
+
 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
 
 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
@@ -3167,8 +3178,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                 /* Return early if there is no error, since we don't need to worry about last literals */
                 if (!ZSTD_isError(nbPostProcessedSeqs)) {
                     ZSTD_sequencePosition seqPos = {0,0,0};
-                    ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
-                        zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize
+                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
+                    RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
+                    FORWARD_IF_ERROR(
+                        ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
+                            zc, &seqPos,
+                            zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
+                            src, srcSize
+                        ),
+                        "Failed to copy external sequences to seqStore!"
                     );
                     ms->ldmSeqStore = NULL;
                     DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs);
@@ -6286,7 +6304,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
         ip += inSeqs[idx].litLength;
         seqPos->posInSrc += inSeqs[idx].litLength;
     }
-    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+    RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
     seqPos->idx = idx+1;
     return 0;
 }
@@ -6444,13 +6462,13 @@ blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD
         blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
         if (end) {
             if (inSeqs[spos].matchLength != 0)
-                RETURN_ERROR(corruption_detected, "delimiter format error : both matchlength and offset must be == 0");
+                RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
             break;
         }
         spos++;
     }
     if (!end)
-        RETURN_ERROR(corruption_detected, "Reached end of sequences without finding a block delimiter");
+        RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
     return blockSize;
 }
 
@@ -6471,9 +6489,9 @@ static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
     {   size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
         FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
         if (explicitBlockSize > blockSize)
-            RETURN_ERROR(corruption_detected, "sequences incorrectly define a too large block");
+            RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
         if (explicitBlockSize > remaining)
-            RETURN_ERROR(srcSize_wrong, "sequences define a frame longer than source");
+            RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
         return explicitBlockSize;
     }
 }
index 8ae76d519ef24304bcc712c7ae6b3cafac1e0385..97c47caffc43a97e56fcd2a4014789392416258f 100644 (file)
@@ -108,6 +108,29 @@ size_t zstreamExternalMatchFinder(
                 compressionLevel,
                 windowSize
             );
+        case EMF_INVALID_OFFSET:
+            outSeqs[0].offset = 1 << 20;
+            outSeqs[0].matchLength = 4;
+            outSeqs[0].litLength = (U32)(srcSize - 4);
+            return 1;
+        case EMF_INVALID_MATCHLEN:
+            outSeqs[0].offset = 1;
+            outSeqs[0].matchLength = (U32)(srcSize);
+            outSeqs[0].litLength = 1;
+            return 1;
+        case EMF_INVALID_LITLEN:
+            outSeqs[0].offset = 0;
+            outSeqs[0].matchLength = 0;
+            outSeqs[0].litLength = (U32)(srcSize + 1);
+            return 1;
+        case EMF_INVALID_LAST_LITS:
+            outSeqs[0].offset = 1;
+            outSeqs[0].matchLength = 1;
+            outSeqs[0].litLength = 1;
+            outSeqs[1].offset = 0;
+            outSeqs[1].matchLength = 0;
+            outSeqs[1].litLength = (U32)(srcSize - 1);
+            return 2;
         case EMF_SMALL_ERROR:
             return outSeqsCapacity + 1;
         case EMF_BIG_ERROR:
index e4f7c95f0ca4e5c9163feea91ee3e08f2e95153d..7550bbcebdb6d9d1f974d59bf101c319c5962d4e 100644 (file)
@@ -20,7 +20,11 @@ typedef enum {
     EMF_ONE_BIG_SEQ = 1,
     EMF_LOTS_OF_SEQS = 2,
     EMF_BIG_ERROR = 3,
-    EMF_SMALL_ERROR = 4
+    EMF_SMALL_ERROR = 4,
+    EMF_INVALID_OFFSET = 5,
+    EMF_INVALID_MATCHLEN = 6,
+    EMF_INVALID_LITLEN = 7,
+    EMF_INVALID_LAST_LITS = 8
 } EMF_testCase;
 
 size_t zstreamExternalMatchFinder(
index 43a26405fc5f3d135085f41b13115ac26719ad1a..310012cb57313d585d3f261cd39968b692acc233 100644 (file)
@@ -128,6 +128,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
     setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
     setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
     setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
+    setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
       setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
     }
index db1e25188ccf1accb7477f20d361497fe0652597..aff847b4d93d82480fa1ac9b682a536e5a473058 100644 (file)
@@ -1873,48 +1873,61 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
          * check that the reference is preserved across compressions */
         ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
 
-        for (enableFallback = 0; enableFallback < 1; enableFallback++) {
+        for (enableFallback = 0; enableFallback <= 1; enableFallback++) {
             size_t testCaseId;
+            size_t const numTestCases = 9;
 
-            EMF_testCase const EMF_successCases[] = {
+            EMF_testCase const testCases[] = {
                 EMF_ONE_BIG_SEQ,
                 EMF_LOTS_OF_SEQS,
-            };
-            size_t const EMF_numSuccessCases = 2;
-
-            EMF_testCase const EMF_failureCases[] = {
                 EMF_ZERO_SEQS,
                 EMF_BIG_ERROR,
                 EMF_SMALL_ERROR,
+                EMF_INVALID_OFFSET,
+                EMF_INVALID_MATCHLEN,
+                EMF_INVALID_LITLEN,
+                EMF_INVALID_LAST_LITS
             };
-            size_t const EMF_numFailureCases = 3;
 
-            /* Test external matchfinder success scenarios */
-            for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) {
-                size_t res;
-                externalMatchState = EMF_successCases[testCaseId];
-                ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
-                CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
-                res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
-                CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
-                CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
-                CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
-            }
+            ZSTD_ErrorCode const errorCodes[] = {
+                ZSTD_error_no_error,
+                ZSTD_error_no_error,
+                ZSTD_error_externalMatchFinder_failed,
+                ZSTD_error_externalMatchFinder_failed,
+                ZSTD_error_externalMatchFinder_failed,
+                ZSTD_error_externalSequences_invalid,
+                ZSTD_error_externalSequences_invalid,
+                ZSTD_error_externalSequences_invalid,
+                ZSTD_error_externalSequences_invalid
+            };
 
-            /* Test external matchfinder failure scenarios */
-            for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) {
+            for (testCaseId = 0; testCaseId < numTestCases; testCaseId++) {
                 size_t res;
-                externalMatchState = EMF_failureCases[testCaseId];
+
+                int const compressionShouldSucceed = (
+                    (errorCodes[testCaseId] == ZSTD_error_no_error) ||
+                    (enableFallback && errorCodes[testCaseId] == ZSTD_error_externalMatchFinder_failed)
+                );
+
+                int const testWithSequenceValidation = (
+                    testCases[testCaseId] == EMF_INVALID_OFFSET
+                );
+
+                externalMatchState = testCases[testCaseId];
+
                 ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+                CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, testWithSequenceValidation));
                 CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
                 res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
-                if (enableFallback) {
+
+                if (compressionShouldSucceed) {
+                    CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
                     CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
                     CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
                 } else {
                     CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
                     CHECK(
-                        ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed,
+                        ZSTD_getErrorCode(res) != errorCodes[testCaseId],
                         "EMF: Wrong error code: %s", ZSTD_getErrorName(res)
                     );
                 }