]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
[ldm] Reset loadedDictEnd when the context is reset
authorNick Terrell <terrelln@fb.com>
Mon, 18 May 2020 19:35:44 +0000 (12:35 -0700)
committerNick Terrell <terrelln@fb.com>
Mon, 18 May 2020 19:35:44 +0000 (12:35 -0700)
lib/compress/zstd_compress.c
lib/compress/zstd_ldm.c
lib/compress/zstdmt_compress.c

index 2cfa93051fd40660e4a5e5a3a9d880b29d95596b..3f963b1cfff8bc11c9c6d6e8a053132feee728dd 100644 (file)
@@ -1576,6 +1576,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
 
             ZSTD_window_init(&zc->ldmState.window);
             ZSTD_window_clear(&zc->ldmState.window);
+            zc->ldmState.loadedDictEnd = 0;
         }
 
         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
index 22f3628070a2a5cd9ceaf5c123dfb3417a993aa1..869986a0af253ed1d04fe55c757bbb0ee741de1a 100644 (file)
@@ -463,6 +463,8 @@ size_t ZSTD_ldm_generateSequences(
             U32 const correction = ZSTD_window_correctOverflow(
                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
             ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
         }
         /* 2. We enforce the maximum offset allowed.
          *
@@ -471,6 +473,12 @@ size_t ZSTD_ldm_generateSequences(
          * TODO: * Test the chunk size.
          *       * Try invalidation after the sequence generation and test the
          *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
          */
         ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
         /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
index f6b26a757fa6c9adde79afd59900b303a55e6264..77a137bdf9cb37d0c1d118040290e3f0b89ec316 100644 (file)
@@ -493,7 +493,6 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState,
         ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
         /* Reset the window */
         ZSTD_window_init(&serialState->ldmState.window);
-        serialState->ldmWindow = serialState->ldmState.window;
         /* Resize tables and output space if necessary. */
         if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
             ZSTD_free(serialState->ldmState.hashTable, cMem);
@@ -508,12 +507,20 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState,
         /* Zero the tables */
         memset(serialState->ldmState.hashTable, 0, hashSize);
         memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
+
+        /* Update window state and fill hash table with dict */
+        if (dictSize > 0) {
+            BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
+            ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+            ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
+            serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
+        }
+
+        /* Initialize serialState's copy of ldmWindow. */
+        serialState->ldmWindow = serialState->ldmState.window;
     }
 
-    /* Update window state and fill hash table with dict */
     if (params.ldmParams.enableLdm && dict) {
-        ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
-        ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, (const BYTE*)dict + dictSize, &params.ldmParams);
     }
 
     serialState->params = params;