]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Only load extra table positions for CDicts 1098/head
authorNick Terrell <terrelln@fb.com>
Mon, 2 Apr 2018 21:41:30 +0000 (14:41 -0700)
committerNick Terrell <terrelln@fb.com>
Mon, 2 Apr 2018 21:41:30 +0000 (14:41 -0700)
Zstdmt uses prefixes to load the overlap between segments. Loading extra
positions makes compression non-deterministic, depending on the previous
job the context was used for. Since loading extra position takes extra
time as well, only do it when creating a `ZSTD_CDict`.

Fixes #1077.

lib/compress/zstd_compress.c
lib/compress/zstd_compress_internal.h
lib/compress/zstd_double_fast.c
lib/compress/zstd_double_fast.h
lib/compress/zstd_fast.c
lib/compress/zstd_fast.h
lib/compress/zstd_ldm.c
lib/compress/zstdmt_compress.c

index 2aa26da4cd43a9422138e02d88280927b4e6030e..36b91030f54b52ed307421034240eab0a4f14e67 100644 (file)
@@ -2190,7 +2190,10 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
 /*! ZSTD_loadDictionaryContent() :
  *  @return : 0, or an error code
  */
-static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize)
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                         ZSTD_CCtx_params const* params,
+                                         const void* src, size_t srcSize,
+                                         ZSTD_dictTableLoadMethod_e dtlm)
 {
     const BYTE* const ip = (const BYTE*) src;
     const BYTE* const iend = ip + srcSize;
@@ -2204,10 +2207,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params
     switch(params->cParams.strategy)
     {
     case ZSTD_fast:
-        ZSTD_fillHashTable(ms, cParams, iend);
+        ZSTD_fillHashTable(ms, cParams, iend, dtlm);
         break;
     case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable(ms, cParams, iend);
+        ZSTD_fillDoubleHashTable(ms, cParams, iend, dtlm);
         break;
 
     case ZSTD_greedy:
@@ -2256,7 +2259,12 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
  *  assumptions : magic number supposed already checked
  *                dictSize supposed > 8
  */
-static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace)
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                      ZSTD_matchState_t* ms,
+                                      ZSTD_CCtx_params const* params,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictTableLoadMethod_e dtlm,
+                                      void* workspace)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
@@ -2336,7 +2344,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matc
         bs->entropy.offcode_repeatMode = FSE_repeat_valid;
         bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
         bs->entropy.litlength_repeatMode = FSE_repeat_valid;
-        CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize));
+        CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
         return dictID;
     }
 }
@@ -2347,6 +2355,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
                                              ZSTD_CCtx_params const* params,
                                        const void* dict, size_t dictSize,
                                              ZSTD_dictContentType_e dictContentType,
+                                             ZSTD_dictTableLoadMethod_e dtlm,
                                              void* workspace)
 {
     DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
@@ -2356,12 +2365,12 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
 
     /* dict restricted modes */
     if (dictContentType == ZSTD_dct_rawContent)
-        return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
+        return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
 
     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
         if (dictContentType == ZSTD_dct_auto) {
             DEBUGLOG(4, "raw content dictionary detected");
-            return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
+            return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
         }
         if (dictContentType == ZSTD_dct_fullDict)
             return ERROR(dictionary_wrong);
@@ -2369,7 +2378,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
     }
 
     /* dict as full zstd dictionary */
-    return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace);
+    return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
 }
 
 /*! ZSTD_compressBegin_internal() :
@@ -2377,6 +2386,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
 size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
                              const void* dict, size_t dictSize,
                              ZSTD_dictContentType_e dictContentType,
+                             ZSTD_dictTableLoadMethod_e dtlm,
                              const ZSTD_CDict* cdict,
                              ZSTD_CCtx_params params, U64 pledgedSrcSize,
                              ZSTD_buffered_policy_e zbuff)
@@ -2397,7 +2407,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
     {
         size_t const dictID = ZSTD_compress_insertDictionary(
                 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
-                &params, dict, dictSize, dictContentType, cctx->entropyWorkspace);
+                &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
         if (ZSTD_isError(dictID)) return dictID;
         assert(dictID <= (size_t)(U32)-1);
         cctx->dictID = (U32)dictID;
@@ -2408,6 +2418,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                     const void* dict, size_t dictSize,
                                     ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
                                     const ZSTD_CDict* cdict,
                                     ZSTD_CCtx_params params,
                                     unsigned long long pledgedSrcSize)
@@ -2416,7 +2427,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
     /* compression parameters verification and optimization */
     CHECK_F( ZSTD_checkCParams(params.cParams) );
     return ZSTD_compressBegin_internal(cctx,
-                                       dict, dictSize, dictContentType,
+                                       dict, dictSize, dictContentType, dtlm,
                                        cdict,
                                        params, pledgedSrcSize,
                                        ZSTDb_not_buffered);
@@ -2431,7 +2442,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
     ZSTD_CCtx_params const cctxParams =
             ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
     return ZSTD_compressBegin_advanced_internal(cctx,
-                                            dict, dictSize, ZSTD_dct_auto,
+                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
                                             NULL /*cdict*/,
                                             cctxParams, pledgedSrcSize);
 }
@@ -2442,7 +2453,7 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
     ZSTD_CCtx_params const cctxParams =
             ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
                                        cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
 }
 
@@ -2553,7 +2564,7 @@ size_t ZSTD_compress_advanced_internal(
 {
     DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)",
                 (U32)srcSize);
-    CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
+    CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
                                          params, srcSize, ZSTDb_not_buffered) );
     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
 }
@@ -2654,7 +2665,7 @@ static size_t ZSTD_initCDict_internal(
         {   size_t const dictID = ZSTD_compress_insertDictionary(
                     &cdict->cBlockState, &cdict->matchState, &params,
                     cdict->dictContent, cdict->dictContentSize,
-                    dictContentType, cdict->workspace);
+                    dictContentType, ZSTD_dtlm_full, cdict->workspace);
             if (ZSTD_isError(dictID)) return dictID;
             assert(dictID <= (size_t)(U32)-1);
             cdict->dictID = (U32)dictID;
@@ -2799,7 +2810,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
         }
         params.fParams = fParams;
         return ZSTD_compressBegin_internal(cctx,
-                                           NULL, 0, ZSTD_dct_auto,
+                                           NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
                                            cdict,
                                            params, pledgedSrcSize,
                                            ZSTDb_not_buffered);
@@ -2889,7 +2900,7 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
 
     CHECK_F( ZSTD_compressBegin_internal(cctx,
-                                         dict, dictSize, dictContentType,
+                                         dict, dictSize, dictContentType, ZSTD_dtlm_fast,
                                          cdict,
                                          params, pledgedSrcSize,
                                          ZSTDb_buffered) );
index 81f12ca6df5e5f656859b127daa37bee2c5d54e5..0a19b3ecd8ea86c7efb48a6fa920229df71612ac 100644 (file)
@@ -235,6 +235,7 @@ struct ZSTD_CCtx_s {
 #endif
 };
 
+typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
 
 typedef size_t (*ZSTD_blockCompressor) (
         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -640,7 +641,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
  * ============================================================== */
 
 /* ZSTD_getCParamsFromCCtxParams() :
- * cParams are built depending on compressionLevel, src size hints, 
+ * cParams are built depending on compressionLevel, src size hints,
  * LDM and manually set compression parameters.
  */
 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
@@ -672,6 +673,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                     const void* dict, size_t dictSize,
                                     ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
                                     const ZSTD_CDict* cdict,
                                     ZSTD_CCtx_params params,
                                     unsigned long long pledgedSrcSize);
index 86e6b39621b2a45cc425d03c5f9f1fcba655b2c7..a4feafbf2617f9f0014358143bb1c99b781078b4 100644 (file)
@@ -14,7 +14,7 @@
 
 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
                               ZSTD_compressionParameters const* cParams,
-                              void const* end)
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
     U32* const hashLarge = ms->hashTable;
     U32  const hBitsL = cParams->hashLog;
@@ -40,6 +40,9 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
                 hashSmall[smHash] = current + i;
             if (i == 0 || hashLarge[lgHash] == 0)
                 hashLarge[lgHash] = current + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
         }
     }
 }
index 6d80b2774c0e02ec50600b56bc11c6f191e98da3..3f5a24eb3760b05928a141debb23553f48848720 100644 (file)
@@ -20,7 +20,7 @@ extern "C" {
 
 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
                               ZSTD_compressionParameters const* cParams,
-                              void const* end);
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
 size_t ZSTD_compressBlock_doubleFast(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
index df4d28b3402bdf4302e72dfe1605aa7711d38002..22b84d1c608748233aa24df20c30f5b5879771a0 100644 (file)
@@ -14,7 +14,7 @@
 
 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
                         ZSTD_compressionParameters const* cParams,
-                        void const* end)
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
     U32* const hashTable = ms->hashTable;
     U32  const hBits = cParams->hashLog;
@@ -34,6 +34,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
             size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
             if (i == 0 || hashTable[hash] == 0)
                 hashTable[hash] = current + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
         }
     }
 }
index f0438ad5b411dfb372719c88e41c56a6bbe94201..746849fcee96f45a9cbbdc530fc0df92c8d089a7 100644 (file)
@@ -20,7 +20,7 @@ extern "C" {
 
 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
                         ZSTD_compressionParameters const* cParams,
-                        void const* end);
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
 size_t ZSTD_compressBlock_fast(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
index bffd8a3dfaa50def364bf65522aab6c5f2d27ae7..9d825e695aed2c2eed5fe28cd259543f8fac1649 100644 (file)
@@ -224,12 +224,12 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
     switch(cParams->strategy)
     {
     case ZSTD_fast:
-        ZSTD_fillHashTable(ms, cParams, iend);
+        ZSTD_fillHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
         ms->nextToUpdate = (U32)(iend - ms->window.base);
         break;
 
     case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable(ms, cParams, iend);
+        ZSTD_fillDoubleHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
         ms->nextToUpdate = (U32)(iend - ms->window.base);
         break;
 
index c7a205d8c757e0a6af197d06511831c7842daf99..62afdd6c27f3b1a66772e69391f0337c68a478e5 100644 (file)
@@ -625,7 +625,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
 
     /* init */
     if (job->cdict) {
-        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
+        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
         assert(job->firstJob);  /* only allowed for first job */
         if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
     } else {  /* srcStart points at reloaded section */
@@ -637,6 +637,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
         }   }
         {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
                                         job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
+                                        ZSTD_dtlm_fast,
                                         NULL, /*cdict*/
                                         jobParams, pledgedSrcSize);
             if (ZSTD_isError(initError)) {