]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Fix external sequence corner cases
authorNick Terrell <terrelln@fb.com>
Tue, 20 Mar 2018 21:31:43 +0000 (14:31 -0700)
committerNick Terrell <terrelln@fb.com>
Tue, 20 Mar 2018 21:50:28 +0000 (14:50 -0700)
* Clear external sequences when we reset the `ZSTD_CCtx`.
* Skip external sequences when a block is too small to compress.

lib/compress/zstd_compress.c
lib/compress/zstd_ldm.c
lib/compress/zstd_ldm.h

index db8d7a8c38dcf6bcc0816c434a0f577646353a76..642296fb618aff8d5c922f0217012e1767ce7d72 100644 (file)
@@ -922,6 +922,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
     cctx->dictID = 0;
     if (params.ldmParams.enableLdm)
         ZSTD_window_clear(&cctx->ldmState.window);
+    ZSTD_referenceExternalSequences(cctx, NULL, 0);
     ZSTD_invalidateMatchState(&cctx->blockState.matchState);
     ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
     XXH64_reset(&cctx->xxhState, 0);
@@ -1108,6 +1109,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
             ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
             ZSTD_window_clear(&zc->ldmState.window);
         }
+        ZSTD_referenceExternalSequences(zc, NULL, 0);
 
         /* buffers */
         zc->inBuffSize = buffInSize;
@@ -1818,8 +1820,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
     ZSTD_matchState_t* const ms = &zc->blockState.matchState;
     DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
                 (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
-    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+        ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
         return 0;   /* don't even attempt compression below a certain srcSize */
+    }
     ZSTD_resetSeqStore(&(zc->seqStore));
 
     /* limited update after a very long match */
index 5c9c0d2b6d735f57c345ccdb2ad5a8f84215d596..aff9dd2be654d56a393e97b59c5756362dcd7d27 100644 (file)
@@ -536,6 +536,34 @@ size_t ZSTD_ldm_generateSequences(
     return 0;
 }
 
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+        if (srcSize <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= srcSize;
+            return;
+        }
+        srcSize -= seq->litLength;
+        seq->litLength = 0;
+        if (srcSize < seq->matchLength) {
+            /* Skip past the first srcSize of the match */
+            seq->matchLength -= srcSize;
+            if (seq->matchLength < minMatch) {
+                /* The match is too short, omit it */
+                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+                    seq[1].litLength += seq[0].matchLength;
+                }
+                rawSeqStore->pos++;
+            }
+            return;
+        }
+        srcSize -= seq->matchLength;
+        seq->matchLength = 0;
+        rawSeqStore->pos++;
+    }
+}
+
 /**
  * If the sequence length is longer than remaining then the sequence is split
  * between this block and the next.
@@ -546,51 +574,24 @@ size_t ZSTD_ldm_generateSequences(
 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
                                  U32 const remaining, U32 const minMatch)
 {
-    size_t const pos = rawSeqStore->pos;
     rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
     assert(sequence.offset > 0);
-    /* Handle partial sequences */
+    /* Likely: No partial sequence */
+    if (remaining >= sequence.litLength + sequence.matchLength) {
+        rawSeqStore->pos++;
+        return sequence;
+    }
+    /* Cut the sequence short (offset == 0 ==> rest is literals). */
     if (remaining <= sequence.litLength) {
-        /* Split the literals that we have out of the sequence.
-         * They will become the last literals of this block.
-         * The next block starts off with the remaining literals.
-         */
-        rawSeqStore->seq[pos].litLength -= remaining;
         sequence.offset = 0;
     } else if (remaining < sequence.litLength + sequence.matchLength) {
-        /* Split the match up into two sequences. One in this block, and one
-         * in the next with no literals. If either match would be shorter
-         * than searchLength we omit it.
-         */
-        U32 const matchPrefix = remaining - sequence.litLength;
-        U32 const matchSuffix = sequence.matchLength - matchPrefix;
-
-        assert(remaining > sequence.litLength);
-        assert(matchPrefix < sequence.matchLength);
-        assert(matchPrefix + matchSuffix == sequence.matchLength);
-        /* Update the first sequence */
-        sequence.matchLength = matchPrefix;
-        /* Update the second sequence */
-        if (matchSuffix >= minMatch) {
-            /* Update the second sequence, since the suffix is long enough */
-            rawSeqStore->seq[pos].litLength = 0;
-            rawSeqStore->seq[pos].matchLength = matchSuffix;
-        } else {
-            /* Omit the second sequence since the match suffix is too short.
-             * Add to the next sequences literals (if any).
-             */
-            if (pos + 1 < rawSeqStore->size)
-                rawSeqStore->seq[pos + 1].litLength += matchSuffix;
-            rawSeqStore->pos++; /* Consume the sequence */
-        }
+        sequence.matchLength = remaining - sequence.litLength;
         if (sequence.matchLength < minMatch) {
-            /* Skip the current sequence if it is too short */
             sequence.offset = 0;
         }
-    } else {
-      /* No partial sequence */
-      rawSeqStore->pos++; /* Consume the sequence */
     }
+    /* Skip past `remaining` bytes for the future sequences. */
+    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
     return sequence;
 }
 
index 9d2f7c391770b72764a1218d13fdd3e4c9209777..84d3723cee9c9d212174ea52d98079a2b2e96e3c 100644 (file)
@@ -65,6 +65,16 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
     ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
     int const extDict);
 
+/**
+ * ZSTD_ldm_skipSequences():
+ *
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+    U32 const minMatch);
+
 
 /** ZSTD_ldm_initializeParameters() :
  *  Initialize the long distance matching parameters to their default values. */