]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
[opt] Fix too short of match getting generated
authorNick Terrell <terrelln@meta.com>
Fri, 20 Dec 2024 22:32:28 +0000 (17:32 -0500)
committerNick Terrell <nickrterrell@gmail.com>
Fri, 3 Jan 2025 16:38:41 +0000 (11:38 -0500)
The optimal parser with LDM enabled using minMatch > 3 could generate a match
length of 3 when minMatch >= 4. This is not allowed.

1. Fix the bug
2. Add validation logic to `ZSTD_buildSeqStore()` in debug mode for all block
   compressors that checks we never generate too short a match. This way we don't
   rely on the `generate_sequences` fuzzer to find this issue.

Credit to OSS-Fuzz

lib/compress/zstd_compress.c
lib/compress/zstd_opt.c

index 3af69c1aec38c57d884f6ef4eae3ec50657eea70..52c54b4da1796fc3085052d31eaaf4653d89a9cb 100644 (file)
@@ -3240,6 +3240,27 @@ static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seq
     return litLenSum + matchLenSum;
 }
 
+/**
+ * Function to validate sequences produced by a block compressor.
+ */
+static void ZSTD_validateSeqStore(const SeqStore_t* seqStore, const ZSTD_compressionParameters* cParams)
+{
+#if DEBUGLEVEL >= 1
+    const SeqDef* seq = seqStore->sequencesStart;
+    const SeqDef* const seqEnd = seqStore->sequences;
+    size_t const matchLenLowerBound = cParams->minMatch == 3 ? 3 : 4;
+    for (; seq < seqEnd; ++seq) {
+        const ZSTD_SequenceLength seqLength = ZSTD_getSequenceLength(seqStore, seq);
+        assert(seqLength.matchLength >= matchLenLowerBound);
+        (void)seqLength;
+        (void)matchLenLowerBound;
+    }
+#else
+    (void)seqStore;
+    (void)cParams;
+#endif
+}
+
 static size_t
 ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx,
                                    ZSTD_SequencePosition* seqPos,
@@ -3410,6 +3431,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
         {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
             ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
     }   }
+    ZSTD_validateSeqStore(&zc->seqStore, &zc->appliedParams.cParams);
     return ZSTDbss_compress;
 }
 
index 0ab33d56eb7e84a17b4d6de4b0ab4eaf40c635b8..3d7171b755bde44cf324fb31d4a791e8554f268a 100644 (file)
@@ -972,7 +972,7 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
         return;
     }
 
-    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+    /* Matches may be < minMatch by this process. In that case, we will reject them
        when we are deciding whether or not to add the ldm */
     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
@@ -994,7 +994,8 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
  * into 'matches'. Maintains the correct ordering of 'matches'.
  */
 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
+                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                      U32 minMatch)
 {
     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
     /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
@@ -1003,7 +1004,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
     /* Ensure that current block position is not outside of the match */
     if (currPosInBlock < optLdm->startPosInBlock
       || currPosInBlock >= optLdm->endPosInBlock
-      || candidateMatchLength < MINMATCH) {
+      || candidateMatchLength < minMatch) {
         return;
     }
 
@@ -1023,7 +1024,8 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
 static void
 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
                                   ZSTD_match_t* matches, U32* nbMatches,
-                                  U32 currPosInBlock, U32 remainingBytes)
+                                  U32 currPosInBlock, U32 remainingBytes,
+                                  U32 minMatch)
 {
     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
         return;
@@ -1040,7 +1042,7 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
         }
         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
     }
-    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
 }
 
 
@@ -1122,7 +1124,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
             U32 const ll0 = !litlen;
             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                              (U32)(ip-istart), (U32)(iend-ip));
+                                              (U32)(ip-istart), (U32)(iend-ip),
+                                              minMatch);
             if (!nbMatches) {
                 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
                 ip++;
@@ -1276,7 +1279,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
                 U32 matchNb;
 
                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                                  (U32)(inr-istart), (U32)(iend-inr));
+                                                  (U32)(inr-istart), (U32)(iend-inr),
+                                                  minMatch);
 
                 if (!nbMatches) {
                     DEBUGLOG(7, "rPos:%u : no match found", cur);