From: Yann Collet Date: Mon, 19 Mar 2018 18:07:04 +0000 (-0700) Subject: make it possible to specify LDM parameters in any order X-Git-Tag: v1.3.4~1^2~12^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9618c0c8045e95e8c30b3bf2cfc5b9dd0fc61cad;p=thirdparty%2Fzstd.git make it possible to specify LDM parameters in any order --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e70c2a54d..42bcc4c05 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -478,17 +478,15 @@ size_t ZSTD_CCtxParam_setParameter( return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value); case ZSTD_p_ldmHashLog : - if (value) { /* 0 : does not change current ldmHashLog */ + if (value>0) /* 0 ==> auto */ CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CCtxParams->ldmParams.hashLog = value; - } + CCtxParams->ldmParams.hashLog = value; return CCtxParams->ldmParams.hashLog; case ZSTD_p_ldmMinMatch : - if (value) { /* 0 : does not change current ldmMinMatch */ + if (value>0) /* 0 ==> default */ CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); - CCtxParams->ldmParams.minMatchLength = value; - } + CCtxParams->ldmParams.minMatchLength = value; return CCtxParams->ldmParams.minMatchLength; case ZSTD_p_ldmBucketSizeLog : @@ -988,8 +986,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zbuff, pledgedSrcSize)) { DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)", zc->appliedParams.cParams.windowLog, (U32)zc->blockSize); - assert(!(params.ldmParams.enableLdm && - params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET)); return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index fa395ed7d..281260712 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -21,10 +21,8 @@ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) { ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); params->enableLdm = enableLdm>0; - params->hashLog = 0; - params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - params->minMatchLength = LDM_MIN_MATCH_LENGTH; - params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET; + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; return 0; } @@ -32,6 +30,9 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, ZSTD_compressionParameters const* cParams) { U32 const windowLog = cParams->windowLog; + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; if (cParams->strategy >= ZSTD_btopt) { /* Get out of the way of the optimal parser */ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); @@ -43,7 +44,7 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG); assert(params->hashLog <= ZSTD_HASHLOG_MAX); } - if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) { + if (params->hashEveryLog == 0) { params->hashEveryLog = windowLog < params->hashLog ? 0 : windowLog - params->hashLog; } @@ -183,6 +184,7 @@ static U64 ZSTD_ldm_ipow(U64 base, U64 exp) } U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { + DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength); assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); } diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 9d2f7c391..d719a3c04 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -22,7 +22,6 @@ extern "C" { ***************************************/ #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX -#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999 /** * ZSTD_ldm_generateSequences(): @@ -39,8 +38,8 @@ extern "C" { * sequences. */ size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize); + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); /** * ZSTD_ldm_blockCompress(): @@ -61,9 +60,10 @@ size_t ZSTD_ldm_generateSequences( * NOTE: This function does not return any errors. */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, - int const extDict); + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + void const* src, size_t srcSize, + int const extDict); /** ZSTD_ldm_initializeParameters() : diff --git a/lib/zstd.h b/lib/zstd.h index 4b1b92c18..367eab905 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -991,38 +991,33 @@ typedef enum { * Special: value 0 means "do not change strategy". */ ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve the compression - * ratio for large inputs with long distance matches. - * This increases the memory usage as well as window size. - * Note: setting this parameter sets all the LDM parameters - * as well as ZSTD_p_windowLog. It should be set after - * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and - * other LDM parameters. Setting the compression level - * after this parameter overrides the window log, though LDM - * will remain enabled until explicitly disabled. */ + * This parameter is designed to improve compression ratio + * for large inputs, thanks to long distance matches. + * It increases memory usage and window size. + * Note: setting this parameter sets ZSTD_p_windowLog. + * Setting compression level after LDM overrides the window log, + * though LDM will remain enabled until explicitly disabled. */ ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2. * Larger values increase memory usage and compression ratio, but decrease * compression speed. * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX * (default: windowlog - 7). - * Special: value 0 means "do not change ldmHashLog". */ + * Special: value 0 means "automatically determine hashlog". */ ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN - * and ZSTD_LDM_MINMATCH_MAX (default: 64). - * Special: value 0 means "do not change ldmMinMatch". */ + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values usually improve collision resolution but may decrease - * compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). - * note : 0 is a valid value */ + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX . + * Special: value 0 means "use default value" (default: 3). */ ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table. * The default is MAX(0, (windowLog - ldmHashLog)) to * optimize hash table usage. - * Larger values improve compression speed. Deviating far from the - * default value will likely result in a decrease in compression ratio. - * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. - * note : 0 is a valid value */ + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Special: value 0 means "automatically determine hashEveryLog". */ /* frame parameters */ ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)