typedef struct {
U32 enableLdm; /* 1 if enable long distance matching */
U32 hashLog; /* Log size of hashTable */
- U32 bucketLog; /* Log number of buckets, at most 4 */
+ U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
U32 minMatchLength; /* Minimum match length */
U32 hashEveryLog; /* Log number of entries to skip */
} ldmParams_t;
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
#define LDM_BUCKET_SIZE_LOG 3
-#define LDM_BUCKET_SIZE_LOG_MAX 4
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_WINDOW_LOG 27
#define LDM_HASH_LOG 20
+ cctx->outBuffSize + cctx->inBuffSize
+ ZSTDMT_sizeof_CCtx(cctx->mtctx);
}
+#if 0
+static void ZSTD_debugPrintCCtxParams(ZSTD_CCtx_params* params)
+{
+ DEBUGLOG(2, "======CCtxParams======");
+ DEBUGLOG(2, "cParams: %u %u %u %u %u %u %u",
+ params->cParams.windowLog,
+ params->cParams.chainLog,
+ params->cParams.hashLog,
+ params->cParams.searchLog,
+ params->cParams.searchLength,
+ params->cParams.targetLength,
+ params->cParams.strategy);
+ DEBUGLOG(2, "fParams: %u %u %u",
+ params->fParams.contentSizeFlag,
+ params->fParams.checksumFlag,
+ params->fParams.noDictIDFlag);
+ DEBUGLOG(2, "cLevel, forceWindow: %u %u",
+ params->compressionLevel,
+ params->forceWindow);
+ DEBUGLOG(2, "ldm: %u %u %u %u %u",
+ params->ldmParams.enableLdm,
+ params->ldmParams.hashLog,
+ params->ldmParams.bucketSizeLog,
+ params->ldmParams.minMatchLength,
+ params->ldmParams.hashEveryLog);
+}
+#endif
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
{
static size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
{
- assert(LDM_BUCKET_SIZE_LOG <= LDM_BUCKET_SIZE_LOG_MAX);
+ assert(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
params->enableLdm = enableLdm>0;
params->hashLog = LDM_HASH_LOG;
- params->bucketLog = LDM_BUCKET_SIZE_LOG;
+ params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
params->minMatchLength = LDM_MIN_MATCH_LENGTH;
params->hashEveryLog = LDM_HASHEVERYLOG_NOTSET;
return 0;
DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->requestedParams.nbThreads);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
- case ZSTD_p_longDistanceMatching:
+ case ZSTD_p_enableLongDistanceMatching:
if (cctx->cdict) return ERROR(stage_wrong);
if (value != 0) {
ZSTD_cLevelToCParams(cctx);
if (cctx->cdict) return ERROR(stage_wrong);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
+ case ZSTD_p_ldmBucketSizeLog:
case ZSTD_p_ldmHashEveryLog:
if (cctx->cdict) return ERROR(stage_wrong);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
if (params->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value);
- case ZSTD_p_longDistanceMatching :
+ case ZSTD_p_enableLongDistanceMatching :
if (value != 0) {
ZSTD_cLevelToCCtxParams(params);
params->cParams.windowLog = LDM_WINDOW_LOG;
params->ldmParams.minMatchLength = value;
return 0;
+ case ZSTD_p_ldmBucketSizeLog :
+ if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) {
+ return ERROR(parameter_outOfBound);
+ }
+ params->ldmParams.bucketSizeLog = value;
+ return 0;
+
case ZSTD_p_ldmHashEveryLog :
if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) {
return ERROR(parameter_outOfBound);
}
/* Estimate the space needed for long distance matching tables. */
-static size_t ZSTD_ldm_getTableSize(U32 ldmHashLog, U32 bucketLog) {
- size_t const ldmHSize = ((size_t)1) << ldmHashLog;
- size_t const ldmBucketLog =
- MIN(bucketLog, LDM_BUCKET_SIZE_LOG_MAX);
+static size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
+ size_t const ldmHSize = ((size_t)1) << hashLog;
+ size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
size_t const ldmBucketSize =
- ((size_t)1) << (ldmHashLog - ldmBucketLog);
+ ((size_t)1) << (hashLog - ldmBucketSizeLog);
return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
}
size_t const ldmSpace = params->ldmParams.enableLdm ?
ZSTD_ldm_getTableSize(params->ldmParams.hashLog,
- params->ldmParams.bucketLog) : 0;
+ params->ldmParams.bucketSizeLog) : 0;
size_t const neededSpace = entropySpace + tableSpace + tokenSpace +
optSpace + ldmSpace;
return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) ||
(ldmParams1.enableLdm == ldmParams2.enableLdm &&
ldmParams1.hashLog == ldmParams2.hashLog &&
- ldmParams1.bucketLog == ldmParams2.bucketLog &&
+ ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
ldmParams1.hashEveryLog == ldmParams2.hashEveryLog);
}
} }
if (params.ldmParams.enableLdm) {
+ /* Adjust long distance matching parameters */
if (params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET) {
params.ldmParams.hashEveryLog =
params.cParams.windowLog < params.ldmParams.hashLog ?
0 : params.cParams.windowLog - params.ldmParams.hashLog;
}
+ params.ldmParams.bucketSizeLog =
+ MIN(params.ldmParams.bucketSizeLog, params.ldmParams.hashLog);
zc->ldmState.hashPower =
ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
}
size_t const ldmSpace = params.ldmParams.enableLdm ?
ZSTD_ldm_getTableSize(params.ldmParams.hashLog,
- params.ldmParams.bucketLog) : 0;
+ params.ldmParams.bucketSizeLog) : 0;
/* Check if workSpace is large enough, alloc a new one if needed */
{ size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
if (params.ldmParams.enableLdm) {
size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
size_t const ldmBucketSize =
- ((size_t)1) << (params.ldmParams.hashLog - params.ldmParams.bucketLog);
+ ((size_t)1) << (params.ldmParams.hashLog - params.ldmParams.bucketSizeLog);
memset(ptr, 0, ldmSpace);
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->ldmState.hashTable = (ldmEntry_t*)ptr;
static ldmEntry_t* ZSTD_ldm_getBucket(
ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
{
- return ldmState->hashTable + (hash << ldmParams.bucketLog);
+ return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
}
/** ZSTD_ldm_insertEntry() :
BYTE* const bucketOffsets = ldmState->bucketOffsets;
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
bucketOffsets[hash]++;
- bucketOffsets[hash] &= (1 << ldmParams.bucketLog) - 1;
+ bucketOffsets[hash] &= (1 << ldmParams.bucketSizeLog) - 1;
}
/** ZSTD_ldm_makeEntryAndInsertByTag() :
ldmState_t* const ldmState = &(cctx->ldmState);
const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
const U64 hashPower = ldmState->hashPower;
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog;
- const U32 ldmBucketSize = (1 << ldmParams.bucketLog);
+ const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
+ const U32 ldmBucketSize = (1 << ldmParams.bucketSizeLog);
const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
seqStore_t* const seqStorePtr = &(cctx->seqStore);
const BYTE* const base = cctx->base;
ldmState_t* const ldmState = &(ctx->ldmState);
const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
const U64 hashPower = ldmState->hashPower;
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog;
- const U32 ldmBucketSize = (1 << ldmParams.bucketLog);
+ const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
+ const U32 ldmBucketSize = (1 << ldmParams.bucketSizeLog);
const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
seqStore_t* const seqStorePtr = &(ctx->seqStore);
const BYTE* const base = ctx->base;
return flushMin;
}
#endif
-
CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
DEBUGLOG(5, "completed ZSTD_compress_generic");
return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
#define ZSTD_TARGETLENGTH_MAX 999
#define ZSTD_LDM_SEARCHLENGTH_MIN 4
#define ZSTD_LDM_SEARCHLENGTH_MAX 4096
+#define ZSTD_LDM_BUCKETSIZELOG_MAX 8
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
#define ZSTD_FRAMEHEADERSIZE_MIN 6
/* advanced parameters - may not remain available after API update */
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
- ZSTD_p_longDistanceMatching, /* Enable long distance matching. This
- * increases the memory usage as well as the
- * window size. Note: this should be set after
- * ZSTD_p_compressionLevel and before
- * ZSTD_p_windowLog and other LDM parameters. */
+ ZSTD_p_enableLongDistanceMatching, /* Enable long distance matching. This increases the memory
+ * usage as well as window size. Note: setting this
+ * parameter resets all the LDM parameters as well as
+ * ZSTD_p_windowLog. It should be set after
+ * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and
+ * other LDM parameters. Setting the compression level
+ * after this parameter overrides the window log, though LDM
+ * will remain enabled until explicitly disabled. */
ZSTD_p_ldmHashLog, /* Size of the table for long distance matching.
- * Must be clamped between ZSTD_HASHLOG_MIN and
- * ZSTD_HASHLOG_MAX */
+ * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. */
ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher.
* Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN
* and ZSTD_LDM_SEARCHLENGTH_MAX. */
- ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the
- * LDM hash table. The default is
- * (windowLog - ldmHashLog) to optimize hash table
- * usage. Must be clamped between 0 and
- * ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
+ ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the hash table for collision resolution.
+ * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. */
+ ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table.
+ * The default is MAX(0, (windowLog - ldmHashLog)) to
+ * optimize hash table usage.
+ * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
} ZSTD_cParameter;
g_ldmHashLog = ldmHashLog;
}
-#define BMK_LDM_HASHEVERYLOG_NOTSET 9999
-static U32 g_ldmHashEveryLog = BMK_LDM_HASHEVERYLOG_NOTSET;
+#define BMK_LDM_PARAM_NOTSET 9999
+static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET;
+void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
+ g_ldmBucketSizeLog = ldmBucketSizeLog;
+}
+
+static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET;
void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
g_ldmHashEveryLog = ldmHashEveryLog;
}
#ifdef ZSTD_NEWAPI
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
- ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
+ ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog);
- if (g_ldmHashEveryLog != BMK_LDM_HASHEVERYLOG_NOTSET) {
+ if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) {
+ ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog);
+ }
+ if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) {
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
}
ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
void BMK_setLdmFlag(unsigned ldmFlag);
void BMK_setLdmMinMatch(unsigned ldmMinMatch);
void BMK_setLdmHashLog(unsigned ldmHashLog);
+void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog);
#endif /* BENCH_H_121279284357 */
void FIO_setLdmMinMatch(unsigned ldmMinMatch) {
g_ldmMinMatch = ldmMinMatch;
}
-#define FIO_LDM_HASHEVERYLOG_NOTSET 9999
-static U32 g_ldmHashEveryLog = FIO_LDM_HASHEVERYLOG_NOTSET;
+
+#define FIO_LDM_PARAM_NOTSET 9999
+static U32 g_ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
+void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
+ g_ldmBucketSizeLog = ldmBucketSizeLog;
+}
+
+static U32 g_ldmHashEveryLog = FIO_LDM_PARAM_NOTSET;
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
g_ldmHashEveryLog = ldmHashEveryLog;
}
/* compression level */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
/* long distance matching */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
+ CHECK( ZSTD_CCtx_setParameter(
+ ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) );
- if (g_ldmHashEveryLog != FIO_LDM_HASHEVERYLOG_NOTSET) {
+ if (g_ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog) );
+ }
+ if (g_ldmHashEveryLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) );
}
/* compression parameters */
void FIO_setLdmFlag(unsigned ldmFlag);
void FIO_setLdmHashLog(unsigned ldmHashLog);
void FIO_setLdmMinMatch(unsigned ldmMinMatch);
+void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
static const int g_defaultDictCLevel = 3;
static const unsigned g_defaultSelectivityLevel = 9;
#define OVERLAP_LOG_DEFAULT 9999
-#define LDM_HASHEVERYLOG_DEFAULT 9999
+#define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */
static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
static U32 g_ldmHashLog = 0;
static U32 g_ldmMinMatch = 0;
-static U32 g_ldmHashEveryLog = LDM_HASHEVERYLOG_DEFAULT;
+static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
+static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
/*-************************************
if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmHlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmSlen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+ if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmHashEveryLog=")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
return 0;
}
BMK_setLdmFlag(ldmFlag);
BMK_setLdmMinMatch(g_ldmMinMatch);
BMK_setLdmHashLog(g_ldmHashLog);
- BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
+ if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
+ BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog);
+ }
+ if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
+ BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
+ }
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
#endif
(void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio;
FIO_setLdmFlag(ldmFlag);
FIO_setLdmHashLog(g_ldmHashLog);
FIO_setLdmMinMatch(g_ldmMinMatch);
- if (g_ldmHashEveryLog != LDM_HASHEVERYLOG_DEFAULT) {
+ if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
+ FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog);
+ }
+ if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
}
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) );
- if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) );
+ if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_enableLongDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) );
/* unconditionally set, to be sync with decoder */
/* mess with frame parameters */