]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Move hashEveryLog to cctxParams and update cli
authorStella Lau <laus@fb.com>
Fri, 1 Sep 2017 21:52:51 +0000 (14:52 -0700)
committerStella Lau <laus@fb.com>
Fri, 1 Sep 2017 22:05:47 +0000 (15:05 -0700)
lib/common/zstd_internal.h
lib/compress/zstd_compress.c
lib/zstd.h
programs/bench.c
programs/bench.h
programs/fileio.c
programs/fileio.h
programs/zstdcli.c
tests/fuzzer.c
tests/zstreamtest.c

index b3d9a6c6ef72d24f167dfe3ab64f7e5badf01aad..cd4146462197392f6302a136b81de4351742dca6 100644 (file)
@@ -256,7 +256,6 @@ typedef struct {
 typedef struct {
     ldmEntry_t* hashTable;
     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
-    U32 hashEveryLog;       /* Log number of entries to skip */
     U64 hashPower;          /* Used to compute the rolling hash.
                              * Depends on ldmParams.minMatchLength */
 } ldmState_t;
@@ -266,6 +265,7 @@ typedef struct {
     U32 hashLog;            /* Log size of hashTable */
     U32 bucketLog;          /* Log number of buckets, at most 4 */
     U32 minMatchLength;     /* Minimum match length */
+    U32 hashEveryLog;       /* Log number of entries to skip */
 } ldmParams_t;
 
 typedef struct {
index e74787f3704e7e9d92e97ea614926c94cfe541ab..fc8e9b0f3a09ff8920f17a82f9dc42cc0fdc07c3 100644 (file)
@@ -42,6 +42,7 @@ typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZS
 #define LDM_WINDOW_LOG 27
 #define LDM_HASH_LOG 20
 #define LDM_HASH_CHAR_OFFSET 10
+#define LDM_HASHEVERYLOG_NOTSET 9999
 
 
 /*-*************************************
@@ -320,6 +321,7 @@ static size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
     params->hashLog = LDM_HASH_LOG;
     params->bucketLog = LDM_BUCKET_SIZE_LOG;
     params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    params->hashEveryLog = LDM_HASHEVERYLOG_NOTSET;
     return 0;
 }
 
@@ -385,6 +387,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
         if (cctx->cdict) return ERROR(stage_wrong);
         return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
 
+    case ZSTD_p_ldmHashEveryLog:
+        if (cctx->cdict) return ERROR(stage_wrong);
+        return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
+
     default: return ERROR(parameter_unsupported);
     }
 }
@@ -503,6 +509,13 @@ size_t ZSTD_CCtxParam_setParameter(
         params->ldmParams.minMatchLength = value;
         return 0;
 
+    case ZSTD_p_ldmHashEveryLog :
+        if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) {
+            return ERROR(parameter_outOfBound);
+        }
+        params->ldmParams.hashEveryLog = value;
+        return 0;
+
     default: return ERROR(parameter_unsupported);
     }
 }
@@ -538,7 +551,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
                     cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) );
     }
 
-    /* Copy long distance matching parameter */
+    /* Copy long distance matching parameters */
     cctx->requestedParams.ldmParams = params->ldmParams;
 
     /* customMem is used only for create/free params and can be ignored */
@@ -742,7 +755,6 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa
                 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
         size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
 
-        /* Ldm parameters can not currently be changed */
         size_t const ldmSpace = params->ldmParams.enableLdm ?
             ZSTD_ldm_getTableSize(params->ldmParams.hashLog,
                                   params->ldmParams.bucketLog) : 0;
@@ -813,7 +825,8 @@ static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
            (ldmParams1.enableLdm == ldmParams2.enableLdm &&
             ldmParams1.hashLog == ldmParams2.hashLog &&
             ldmParams1.bucketLog == ldmParams2.bucketLog &&
-            ldmParams1.minMatchLength == ldmParams2.minMatchLength);
+            ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
+            ldmParams1.hashEveryLog == ldmParams2.hashEveryLog);
 }
 
 /** Equivalence for resetCCtx purposes */
@@ -866,6 +879,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
     if (crp == ZSTDcrp_continue) {
         if (ZSTD_equivalentParams(params, zc->appliedParams)) {
             DEBUGLOG(5, "ZSTD_equivalentParams()==1");
+            assert(!(params.ldmParams.enableLdm &&
+                     params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET));
             zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
             zc->entropy->offcode_repeatMode = FSE_repeat_none;
             zc->entropy->matchlength_repeatMode = FSE_repeat_none;
@@ -874,9 +889,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
     }   }
 
     if (params.ldmParams.enableLdm) {
-        zc->ldmState.hashEveryLog =
-            params.cParams.windowLog < params.ldmParams.hashLog ?
-                0 : params.cParams.windowLog - params.ldmParams.hashLog;
+        if (params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET) {
+            params.ldmParams.hashEveryLog =
+                    params.cParams.windowLog < params.ldmParams.hashLog ?
+                    0 : params.cParams.windowLog - params.ldmParams.hashLog;
+        }
         zc->ldmState.hashPower =
                 ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
     }
@@ -3159,19 +3176,19 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
  *
  *  Gets the small hash, checksum, and tag from the rollingHash.
  *
- *  If the tag matches (1 << ldmState->hashEveryLog)-1, then
+ *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
  *  creates an ldmEntry from the offset, and inserts it into the hash table.
  *
  *  hBits is the length of the small hash, which is the most significant hBits
  *  of rollingHash. The checksum is the next 32 most significant bits, followed
- *  by ldmState->hashEveryLog bits that make up the tag. */
+ *  by ldmParams.hashEveryLog bits that make up the tag. */
 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
                                              U64 rollingHash, U32 hBits,
                                              U32 const offset,
                                              ldmParams_t const ldmParams)
 {
-    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog);
-    U32 const tagMask = (1 << ldmState->hashEveryLog) - 1;
+    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
+    U32 const tagMask = (1 << ldmParams.hashEveryLog) - 1;
     if (tag == tagMask) {
         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
@@ -3349,7 +3366,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
     const U64 hashPower = ldmState->hashPower;
     const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog;
     const U32 ldmBucketSize = (1 << ldmParams.bucketLog);
-    const U32 ldmTagMask = (1 << ldmState->hashEveryLog) - 1;
+    const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
     seqStore_t* const seqStorePtr = &(cctx->seqStore);
     const BYTE* const base = cctx->base;
     const BYTE* const istart = (const BYTE*)src;
@@ -3388,7 +3405,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
         lastHashed = ip;
 
         /* Do not insert and do not look for a match */
-        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) !=
+        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
                 ldmTagMask) {
            ip++;
            continue;
@@ -3546,12 +3563,12 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic(
                                  ZSTD_CCtx* ctx,
                                  const void* src, size_t srcSize)
 {
-    ldmState_t* ldmState = &(ctx->ldmState);
+    ldmState_t* const ldmState = &(ctx->ldmState);
     const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
     const U64 hashPower = ldmState->hashPower;
     const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog;
     const U32 ldmBucketSize = (1 << ldmParams.bucketLog);
-    const U32 ldmTagMask = (1 << ctx->ldmState.hashEveryLog) - 1;
+    const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
     seqStore_t* const seqStorePtr = &(ctx->seqStore);
     const BYTE* const base = ctx->base;
     const BYTE* const dictBase = ctx->dictBase;
@@ -3594,7 +3611,7 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic(
         }
         lastHashed = ip;
 
-        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) !=
+        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
                 ldmTagMask) {
             /* Don't insert and don't look for a match */
            ip++;
index bed583c929393297c7c530d114391d3ec44fad0b..a7ad9771b88f155215c2e03411886a6a15eeace3 100644 (file)
@@ -984,11 +984,16 @@ typedef enum {
                                    * ZSTD_p_compressionLevel and before
                                    * ZSTD_p_windowLog and other LDM parameters. */
     ZSTD_p_ldmHashLog,   /* Size of the table for long distance matching.
-                           * Must be clamped between ZSTD_HASHLOG_MIN and
-                           * ZSTD_HASHLOG_MAX */
+                          * Must be clamped between ZSTD_HASHLOG_MIN and
+                          * ZSTD_HASHLOG_MAX */
     ZSTD_p_ldmMinMatch,  /* Minimum size of searched matches for long distance matcher.
-                           * Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN
-                           * and ZSTD_LDM_SEARCHLENGTH_MAX. */
+                          * Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN
+                          * and ZSTD_LDM_SEARCHLENGTH_MAX. */
+    ZSTD_p_ldmHashEveryLog,  /* Frequency of inserting/looking up entries in the
+                              * LDM hash table. The default is
+                              * (windowLog - ldmHashLog) to optimize hash table
+                              * usage. Must be clamped between 0 and
+                              * ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
 
 } ZSTD_cParameter;
 
index d77f9a20dc0c5a966ef6c1de4393155c492f2540..2a2510a2cfe1d44444c0800edd917988f694d069 100644 (file)
@@ -134,6 +134,23 @@ void BMK_setLdmFlag(unsigned ldmFlag) {
     g_ldmFlag = ldmFlag;
 }
 
+static U32 g_ldmMinMatch = 0;
+void BMK_setLdmMinMatch(unsigned ldmMinMatch) {
+    g_ldmMinMatch = ldmMinMatch;
+}
+
+static U32 g_ldmHashLog = 0;
+void BMK_setLdmHashLog(unsigned ldmHashLog) {
+    g_ldmHashLog = ldmHashLog;
+}
+
+#define BMK_LDM_HASHEVERYLOG_NOTSET 9999
+static U32 g_ldmHashEveryLog = BMK_LDM_HASHEVERYLOG_NOTSET;
+void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
+    g_ldmHashEveryLog = ldmHashEveryLog;
+}
+
+
 /* ********************************************************
 *  Bench functions
 **********************************************************/
@@ -270,6 +287,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
+                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
+                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog);
+                    if (g_ldmHashEveryLog != BMK_LDM_HASHEVERYLOG_NOTSET) {
+                      ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
+                    }
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
index 7fb73c4d8d115d949c06660f4e94827ae22e42af..04d220a92679feb5866cd3f267b215853cfe886f 100644 (file)
@@ -26,5 +26,8 @@ void BMK_setNotificationLevel(unsigned level);
 void BMK_setAdditionalParam(int additionalParam);
 void BMK_setDecodeOnlyMode(unsigned decodeFlag);
 void BMK_setLdmFlag(unsigned ldmFlag);
+void BMK_setLdmMinMatch(unsigned ldmMinMatch);
+void BMK_setLdmHashLog(unsigned ldmHashLog);
+void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog);
 
 #endif   /* BENCH_H_121279284357 */
index c86b25339b75ecd72f6be1ed34b8db3df0c1d394..fc390afedccc40fbe9c22de299c93c8f85b71b49 100644 (file)
@@ -217,6 +217,20 @@ static U32 g_ldmFlag = 0;
 void FIO_setLdmFlag(unsigned ldmFlag) {
     g_ldmFlag = (ldmFlag>0);
 }
+static U32 g_ldmHashLog = 0;
+void FIO_setLdmHashLog(unsigned ldmHashLog) {
+    g_ldmHashLog = ldmHashLog;
+}
+static U32 g_ldmMinMatch = 0;
+void FIO_setLdmMinMatch(unsigned ldmMinMatch) {
+    g_ldmMinMatch = ldmMinMatch;
+}
+#define FIO_LDM_HASHEVERYLOG_NOTSET 9999
+static U32 g_ldmHashEveryLog = FIO_LDM_HASHEVERYLOG_NOTSET;
+void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
+    g_ldmHashEveryLog = ldmHashEveryLog;
+}
+
 
 
 /*-*************************************
@@ -406,6 +420,11 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
             /* long distance matching */
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
+            CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) );
+            CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) );
+            if (g_ldmHashEveryLog != FIO_LDM_HASHEVERYLOG_NOTSET) {
+                CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) );
+            }
             /* compression parameters */
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
index 7e200b06e5622686f56708dc7a27253efd38a74e..fabb46db9547fb83e0b9a28b5e7ef0a1f1f60535 100644 (file)
@@ -57,6 +57,9 @@ void FIO_setNbThreads(unsigned nbThreads);
 void FIO_setBlockSize(unsigned blockSize);
 void FIO_setOverlapLog(unsigned overlapLog);
 void FIO_setLdmFlag(unsigned ldmFlag);
+void FIO_setLdmHashLog(unsigned ldmHashLog);
+void FIO_setLdmMinMatch(unsigned ldmMinMatch);
+void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
 
 
 /*-*************************************
index b5fd1be5a0d4ab31aede51b4e76d14c44b584af8..78d6b339e536788251afbc1ac1006b184b78bc14 100644 (file)
@@ -72,7 +72,11 @@ static const unsigned g_defaultMaxDictSize = 110 KB;
 static const int      g_defaultDictCLevel = 3;
 static const unsigned g_defaultSelectivityLevel = 9;
 #define OVERLAP_LOG_DEFAULT 9999
+#define LDM_HASHEVERYLOG_DEFAULT 9999
 static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
+static U32 g_ldmHashLog = 0;
+static U32 g_ldmMinMatch = 0;
+static U32 g_ldmHashEveryLog = LDM_HASHEVERYLOG_DEFAULT;
 
 
 /*-************************************
@@ -305,6 +309,9 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi
         if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmHlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmSlen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "ldmHashEveryLog=")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         return 0;
     }
 
@@ -724,6 +731,9 @@ int main(int argCount, const char* argv[])
         BMK_setNbThreads(nbThreads);
         BMK_setNbSeconds(bench_nbSeconds);
         BMK_setLdmFlag(ldmFlag);
+        BMK_setLdmMinMatch(g_ldmMinMatch);
+        BMK_setLdmHashLog(g_ldmHashLog);
+        BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
         BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
 #endif
         (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio;
@@ -792,6 +802,12 @@ int main(int argCount, const char* argv[])
         FIO_setNbThreads(nbThreads);
         FIO_setBlockSize((U32)blockSize);
         FIO_setLdmFlag(ldmFlag);
+        FIO_setLdmHashLog(g_ldmHashLog);
+        FIO_setLdmMinMatch(g_ldmMinMatch);
+        if (g_ldmHashEveryLog != LDM_HASHEVERYLOG_DEFAULT) {
+            FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
+        }
+
         if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
         if ((filenameIdx==1) && outFileName)
           operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);
index 3b3e23b3897f957b81a569e4c0496dc845384075..b2349870570fb2fef2608c7d248052f01dc9eb7e 100644 (file)
@@ -1342,7 +1342,6 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
             dictSize = FUZ_rLogLength(&lseed, dictLog);   /* needed also for decompression */
             dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
 
-            CHECK_Z ( ZSTD_CCtx_setParameter(refCtx, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed)&255) );
             if (FUZ_rand(&lseed) & 0xF) {
                 CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
             } else {
index dedb7eb39302c2b06d384e2dd05ef6913901ebf9..f248d2609b7bc84731c41048828285a0602eea8f 100644 (file)
@@ -1381,8 +1381,6 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) );
 
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) );
-                if (FUZ_rand(&lseed) & 7) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmMinMatch, FUZ_rand(&lseed) % 128 + 4, useOpaqueAPI ) );
-                if (FUZ_rand(&lseed) & 7) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashLog, FUZ_rand(&lseed) % 18 + 10,  useOpaqueAPI ) );
 
                 /* unconditionally set, to be sync with decoder */
                 /* mess with frame parameters */