]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
make it possible to specify LDM parameters in any order
authorYann Collet <cyan@fb.com>
Mon, 19 Mar 2018 18:07:04 +0000 (11:07 -0700)
committerYann Collet <cyan@fb.com>
Mon, 19 Mar 2018 18:07:04 +0000 (11:07 -0700)
lib/compress/zstd_compress.c
lib/compress/zstd_ldm.c
lib/compress/zstd_ldm.h
lib/zstd.h

index e70c2a54da241b05739397c30699c9ee70911f42..42bcc4c0547bb596e77fa548aa278d5ec347c093 100644 (file)
@@ -478,17 +478,15 @@ size_t ZSTD_CCtxParam_setParameter(
         return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value);
 
     case ZSTD_p_ldmHashLog :
-        if (value) { /* 0 : does not change current ldmHashLog */
+        if (value>0)   /* 0 ==> auto */
             CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
-            CCtxParams->ldmParams.hashLog = value;
-        }
+        CCtxParams->ldmParams.hashLog = value;
         return CCtxParams->ldmParams.hashLog;
 
     case ZSTD_p_ldmMinMatch :
-        if (value) { /* 0 : does not change current ldmMinMatch */
+        if (value>0)   /* 0 ==> default */
             CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX);
-            CCtxParams->ldmParams.minMatchLength = value;
-        }
+        CCtxParams->ldmParams.minMatchLength = value;
         return CCtxParams->ldmParams.minMatchLength;
 
     case ZSTD_p_ldmBucketSizeLog :
@@ -988,8 +986,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
                                 zbuff, pledgedSrcSize)) {
             DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)",
                         zc->appliedParams.cParams.windowLog, (U32)zc->blockSize);
-            assert(!(params.ldmParams.enableLdm &&
-                     params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET));
             return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
     }   }
     DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
index fa395ed7daa9b0a5a79dbb83608cf88208561d38..28126071295fd0bb0a1e3bc1f7993eee6cd07b01 100644 (file)
@@ -21,10 +21,8 @@ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
 {
     ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
     params->enableLdm = enableLdm>0;
-    params->hashLog = 0;
-    params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
-    params->minMatchLength = LDM_MIN_MATCH_LENGTH;
-    params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
     return 0;
 }
 
@@ -32,6 +30,9 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
                                ZSTD_compressionParameters const* cParams)
 {
     U32 const windowLog = cParams->windowLog;
+    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
     if (cParams->strategy >= ZSTD_btopt) {
       /* Get out of the way of the optimal parser */
       U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
@@ -43,7 +44,7 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
         params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
     }
-    if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
+    if (params->hashEveryLog == 0) {
         params->hashEveryLog =
                 windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
     }
@@ -183,6 +184,7 @@ static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
 }
 
 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
+    DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
     assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
     return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
 }
index 9d2f7c391770b72764a1218d13fdd3e4c9209777..d719a3c041e6e14668e1e7684bbfb10e6b2a3bfa 100644 (file)
@@ -22,7 +22,6 @@ extern "C" {
 ***************************************/
 
 #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
-#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999
 
 /**
  * ZSTD_ldm_generateSequences():
@@ -39,8 +38,8 @@ extern "C" {
  *       sequences.
  */
 size_t ZSTD_ldm_generateSequences(
-        ldmState_t* ldms, rawSeqStore_t* sequences,
-        ldmParams_t const* params, void const* src, size_t srcSize);
+            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmParams_t const* params, void const* src, size_t srcSize);
 
 /**
  * ZSTD_ldm_blockCompress():
@@ -61,9 +60,10 @@ size_t ZSTD_ldm_generateSequences(
  * NOTE: This function does not return any errors.
  */
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-    ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
-    int const extDict);
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_compressionParameters const* cParams,
+        void const* src, size_t srcSize,
+        int const extDict);
 
 
 /** ZSTD_ldm_initializeParameters() :
index 4b1b92c18d2e324e165bc494efafedcdd9a72a08..367eab905e4212f1b62817e4ab0b656c9fbfbd1a 100644 (file)
@@ -991,38 +991,33 @@ typedef enum {
                               * Special: value 0 means "do not change strategy". */
 
     ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching.
-                                         * This parameter is designed to improve the compression
-                                         * ratio for large inputs with long distance matches.
-                                         * This increases the memory usage as well as window size.
-                                         * Note: setting this parameter sets all the LDM parameters
-                                         * as well as ZSTD_p_windowLog. It should be set after
-                                         * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and
-                                         * other LDM parameters. Setting the compression level
-                                         * after this parameter overrides the window log, though LDM
-                                         * will remain enabled until explicitly disabled. */
+                                         * This parameter is designed to improve compression ratio
+                                         * for large inputs, thanks to long distance matches.
+                                         * It increases memory usage and window size.
+                                         * Note: setting this parameter sets ZSTD_p_windowLog.
+                                         * Setting compression level after LDM overrides the window log,
+                                         * though LDM will remain enabled until explicitly disabled. */
     ZSTD_p_ldmHashLog,       /* Size of the table for long distance matching, as a power of 2.
                               * Larger values increase memory usage and compression ratio, but decrease
                               * compression speed.
                               * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
                               * (default: windowlog - 7).
-                              * Special: value 0 means "do not change ldmHashLog". */
+                              * Special: value 0 means "automatically determine hashlog". */
     ZSTD_p_ldmMinMatch,      /* Minimum size of searched matches for long distance matcher.
                               * Larger/too small values usually decrease compression ratio.
-                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN
-                              * and ZSTD_LDM_MINMATCH_MAX (default: 64).
-                              * Special: value 0 means "do not change ldmMinMatch". */
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
     ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
-                              * Larger values usually improve collision resolution but may decrease
-                              * compression speed.
-                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3).
-                              * note : 0 is a valid value */
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX .
+                              * Special: value 0 means "use default value" (default: 3). */
     ZSTD_p_ldmHashEveryLog,  /* Frequency of inserting/looking up entries in the LDM hash table.
                               * The default is MAX(0, (windowLog - ldmHashLog)) to
                               * optimize hash table usage.
-                              * Larger values improve compression speed. Deviating far from the
-                              * default value will likely result in a decrease in compression ratio.
-                              * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN.
-                              * note : 0 is a valid value */
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Special: value 0 means "automatically determine hashEveryLog". */
 
     /* frame parameters */
     ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)