*.code-workspace
compile_commands.json
.clangd
+perf.data
+perf.data.old
ZSTD_customMem customMem;
U32 dictID;
int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
+ * row-based matchfinder. Unless the cdict is reloaded, we will use
+ * the same greedy/lazy matchfinder at compression time.
+ */
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
/* private API call, for dictBuilder only */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+/* Returns true if the strategy supports using a row based matchfinder */
+static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
+ return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
+}
+
+/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
+ * for this compression.
+ */
+static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) {
+ assert(mode != ZSTD_urm_auto);
+ return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder);
+}
+
+/* Returns row matchfinder usage enum given an initial mode and cParams */
+static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode,
+ const ZSTD_compressionParameters* const cParams) {
+#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
+ int const kHasSIMD128 = 1;
+#else
+ int const kHasSIMD128 = 0;
+#endif
+ if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
+ mode = ZSTD_urm_disableRowMatchFinder;
+ if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
+ if (kHasSIMD128) {
+ if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder;
+ } else {
+ if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder;
+ }
+ return mode;
+}
+
+/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
+static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+ const U32 forDDSDict) {
+ assert(useRowMatchFinder != ZSTD_urm_auto);
+ /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
+ * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
+ */
+ return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
+}
+
/* Returns 1 if compression parameters are such that we should
* enable long distance matching (wlog >= 27, strategy >= btopt).
* Returns 0 otherwise.
cctxParams.splitBlocks = 1;
}
+ cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
assert(!ZSTD_checkCParams(cParams));
return cctxParams;
}
* But, set it for tracing anyway.
*/
cctxParams->compressionLevel = compressionLevel;
+ cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
+ DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder);
}
size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
-
+
case ZSTD_c_splitBlocks:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
+ case ZSTD_c_useRowMatchFinder:
+ bounds.lowerBound = (int)ZSTD_urm_auto;
+ bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
+ return bounds;
+
default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
case ZSTD_c_splitBlocks:
+ case ZSTD_c_useRowMatchFinder:
default:
return 0;
}
case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
case ZSTD_c_splitBlocks:
+ case ZSTD_c_useRowMatchFinder:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
CCtxParams->splitBlocks = value;
return CCtxParams->splitBlocks;
+ case ZSTD_c_useRowMatchFinder:
+ BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
+ CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
+ return CCtxParams->useRowMatchFinder;
+
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
case ZSTD_c_splitBlocks :
*value = (int)CCtxParams->splitBlocks;
break;
+ case ZSTD_c_useRowMatchFinder :
+ *value = (int)CCtxParams->useRowMatchFinder;
+ break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
static size_t
ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+ const U32 enableDedicatedDictSearch,
const U32 forCCtx)
{
- size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+ /* chain table size should be 0 for fast or row-hash strategies */
+ size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
+ ? ((size_t)1 << cParams->chainLog)
+ : 0;
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+ size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
+ ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+ : 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
: 0;
/* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
+ assert(useRowMatchFinder != ZSTD_urm_auto);
DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
(U32)chainSize, (U32)hSize, (U32)h3Size);
- return tableSpace + optSpace + slackSpace;
+ return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
}
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
const ZSTD_compressionParameters* cParams,
const ldmParams_t* ldmParams,
const int isStatic,
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
const size_t buffInSize,
const size_t buffOutSize,
const U64 pledgedSrcSize)
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
- size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+ size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
{
ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
+ &cParams);
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
/* estimateCCtxSize is for one-shot compression. So no buffers should
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
}
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
{
- ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
- return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms);
+ ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+ if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+ /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+ size_t noRowCCtxSize;
+ size_t rowCCtxSize;
+ initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+ noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+ initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+ rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+ return MAX(noRowCCtxSize, rowCCtxSize);
+ } else {
+ return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+ }
}
static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams);
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize,
+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
ZSTD_CONTENTSIZE_UNKNOWN);
}
}
size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
{
- ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
- return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms);
+ ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+ if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+ /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+ size_t noRowCCtxSize;
+ size_t rowCCtxSize;
+ initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+ noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+ initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+ rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+ return MAX(noRowCCtxSize, rowCCtxSize);
+ } else {
+ return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+ }
}
static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
ZSTD_resetTarget_CCtx
} ZSTD_resetTarget_e;
+
static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_cwksp* ws,
const ZSTD_compressionParameters* cParams,
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
const ZSTD_compResetPolicy_e crp,
const ZSTD_indexResetPolicy_e forceResetIndex,
const ZSTD_resetTarget_e forWho)
{
- size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+ /* disable chain table allocation for fast or row-based strategies */
+ size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
+ ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
+ ? ((size_t)1 << cParams->chainLog)
+ : 0;
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+ assert(useRowMatchFinder != ZSTD_urm_auto);
if (forceResetIndex == ZSTDirp_reset) {
ZSTD_window_init(&ms->window);
ZSTD_cwksp_mark_tables_dirty(ws);
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}
+ if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+ { /* Row match finder needs an additional table of hashes ("tags") */
+ size_t const tagTableSize = hSize*sizeof(U16);
+ ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+ if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
+ }
+ { /* Switch to 32-entry rows if searchLog is 5 (or more) */
+ U32 const rowLog = cParams->searchLog < 5 ? 4 : 5;
+ assert(cParams->hashLog > rowLog);
+ ms->rowHashLog = ZSTD_highbit32((U32)1 << (cParams->hashLog - rowLog));
+ }
+ }
+
ms->cParams = *cParams;
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
"failed a workspace allocation in ZSTD_reset_matchState");
-
return 0;
}
ZSTD_buffered_policy_e const zbuff)
{
ZSTD_cwksp* const ws = &zc->workspace;
- DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
- (U32)pledgedSrcSize, params.cParams.windowLog);
+ DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
+ (U32)pledgedSrcSize, params.cParams.windowLog, (int)params.useRowMatchFinder);
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
zc->isFirstBlock = 1;
+ assert(params.useRowMatchFinder != ZSTD_urm_auto);
if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0,
+ ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, params.useRowMatchFinder,
buffInSize, buffOutSize, pledgedSrcSize);
int resizeWorkspace;
&zc->blockState.matchState,
ws,
¶ms.cParams,
+ params.useRowMatchFinder,
crp,
needsIndexReset,
ZSTD_resetTarget_CCtx), "");
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
+ DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%zu", pledgedSrcSize);
{
ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
unsigned const windowLog = params.cParams.windowLog;
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
cdict->dictContentSize, ZSTD_cpm_attachDict);
params.cParams.windowLog = windowLog;
+ params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_makeClean, zbuff), "");
assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
assert(!cdict->matchState.dedicatedDictSearch);
-
- DEBUGLOG(4, "copying dictionary into context");
+ DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%zu", pledgedSrcSize);
{ unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0);
/* Copy only compression parameters related to tables. */
params.cParams = *cdict_cParams;
params.cParams.windowLog = windowLog;
+ params.useRowMatchFinder = cdict->useRowMatchFinder;
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_leaveDirty, zbuff), "");
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
}
ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+ assert(params.useRowMatchFinder != ZSTD_urm_auto);
/* copy tables */
- { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+ { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
+ ? ((size_t)1 << cdict_cParams->chainLog)
+ : 0;
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
ZSTD_memcpy(cctx->blockState.matchState.hashTable,
cdict->matchState.hashTable,
hSize * sizeof(U32));
- ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+ /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
+ if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
+ ZSTD_memcpy(cctx->blockState.matchState.chainTable,
cdict->matchState.chainTable,
chainSize * sizeof(U32));
+ }
+ /* copy tag table */
+ if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
+ size_t const tagTableSize = hSize*sizeof(U16);
+ ZSTD_memcpy(cctx->blockState.matchState.tagTable,
+ cdict->matchState.tagTable,
+ tagTableSize);
+ }
}
/* Zero the hashTable3, since the cdict never fills it */
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
- DEBUGLOG(5, "ZSTD_copyCCtx_internal");
RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
"Can't copy a ctx that's not in init stage.");
-
+ DEBUGLOG(5, "ZSTD_copyCCtx_internal");
ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
/* Copy only compression parameters related to tables. */
params.cParams = srcCCtx->appliedParams.cParams;
+ assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto);
+ params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
ZSTDcrp_leaveDirty, zbuff);
ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
/* copy tables */
- { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+ { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
+ srcCCtx->appliedParams.useRowMatchFinder,
+ 0 /* forDDSDict */)
+ ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
+ : 0;
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
int const h3log = srcCCtx->blockState.matchState.hashLog3;
size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
/* ZSTD_buildSequencesStatistics():
* Returns the size of the statistics for a given set of sequences, or a ZSTD error code,
* Also modifies LLtype, Offtype, MLtype, and lastNCount to the appropriate values.
- *
+ *
* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
*/
static ZSTD_symbolEncodingTypeStats_t
/* ZSTD_selectBlockCompressor() :
* Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
{
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
- selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+ DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
+ if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
+ static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
+ { ZSTD_compressBlock_greedy_row,
+ ZSTD_compressBlock_lazy_row,
+ ZSTD_compressBlock_lazy2_row },
+ { ZSTD_compressBlock_greedy_extDict_row,
+ ZSTD_compressBlock_lazy_extDict_row,
+ ZSTD_compressBlock_lazy2_extDict_row },
+ { ZSTD_compressBlock_greedy_dictMatchState_row,
+ ZSTD_compressBlock_lazy_dictMatchState_row,
+ ZSTD_compressBlock_lazy2_dictMatchState_row },
+ { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
+ ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
+ ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+ };
+ DEBUGLOG(4, "Selecting a row-based matchfinder");
+ assert(useRowMatchFinder != ZSTD_urm_auto);
+ selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
+ } else {
+ selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+ }
assert(selectedCompressor != NULL);
return selectedCompressor;
}
ZSTD_ldm_blockCompress(&zc->externSeqStore,
ms, &zc->seqStore,
zc->blockState.nextCBlock->rep,
+ zc->appliedParams.useRowMatchFinder,
src, srcSize);
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
} else if (zc->appliedParams.ldmParams.enableLdm) {
ZSTD_ldm_blockCompress(&ldmSeqStore,
ms, &zc->seqStore,
zc->blockState.nextCBlock->rep,
+ zc->appliedParams.useRowMatchFinder,
src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */
- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
/** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block.
* Requires workspace size ENTROPY_WORKSPACE_SIZE
- *
+ *
* @return : 0 on success or error code
*/
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
/* ZSTD_compressSeqStore_singleBlock():
* Compresses a seqStore into a block with a block header, into the buffer dst.
- *
+ *
* Returns the total size of that block (including header) or a ZSTD error code.
*/
static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
* If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
* we do not recurse.
- *
+ *
* Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
* In practice, recursion depth usually doesn't go beyond 4.
- *
+ *
* Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
* maximum of 128 KB, this value is actually impossible to reach.
*/
/* ZSTD_compressBlock_splitBlock():
* Attempts to split a given block into multiple blocks to improve compression ratio.
- *
+ *
* Returns combined size of all blocks (which includes headers), or a ZSTD error code.
*/
static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
const BYTE* ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
+ DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
ZSTD_window_update(&ms->window, src, srcSize);
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
- if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
- assert(chunk == remaining); /* must load everything in one go */
- ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
- } else if (chunk >= HASH_READ_SIZE) {
- ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+ if (chunk >= HASH_READ_SIZE) {
+ if (ms->dedicatedDictSearch) {
+ assert(chunk == remaining); /* must load everything in one go */
+ assert(ms->chainTable != NULL);
+ ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
+ } else {
+ assert(params->useRowMatchFinder != ZSTD_urm_auto);
+ if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+ if (ip == src)
+ ZSTD_memset(ms->tagTable, 0, tagTableSize);
+ ZSTD_row_update(ms, ichunk-HASH_READ_SIZE);
+ DEBUGLOG(4, "Using row-based hash table for lazy dict");
+ } else {
+ ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+ DEBUGLOG(4, "Using chain-based hash table for lazy dict");
+ }
+ }
}
break;
const BYTE* const dictEnd = dictPtr + dictSize;
size_t dictID;
size_t eSize;
-
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
assert(dictSize >= 8);
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
- + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+ /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
+ * in case we are using DDS with row-hash. */
+ + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams),
+ /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
}
&cdict->matchState,
&cdict->workspace,
¶ms.cParams,
+ params.useRowMatchFinder,
ZSTDcrp_makeClean,
ZSTDirp_reset,
ZSTD_resetTarget_CDict), "");
static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+ ZSTD_compressionParameters cParams,
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+ U32 enableDedicatedDictSearch,
+ ZSTD_customMem customMem)
{
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
{ size_t const workspaceSize =
ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
- ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+ ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
(dictLoadMethod == ZSTD_dlm_byRef ? 0
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
ZSTD_cwksp_move(&cdict->workspace, &ws);
cdict->customMem = customMem;
cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
-
+ cdict->useRowMatchFinder = useRowMatchFinder;
return cdict;
}
}
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
}
+ DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
cctxParams.cParams = cParams;
+ cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
cdict = ZSTD_createCDict_advanced_internal(dictSize,
dictLoadMethod, cctxParams.cParams,
+ cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
customMem);
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams)
{
- size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams);
+ /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
ZSTD_CCtxParams_init(¶ms, 0);
params.cParams = cParams;
+ params.useRowMatchFinder = useRowMatchFinder;
+ cdict->useRowMatchFinder = useRowMatchFinder;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
params.splitBlocks = 1;
}
+ params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
+
#ifdef ZSTD_MULTITHREAD
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
else row = compressionLevel;
{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+ DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
/* acceleration factor */
if (compressionLevel < 0) {
int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+
+#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
+
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
*/
U32 nextToUpdate; /* index from which to continue table update */
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
+
+ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+ U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+
U32* hashTable;
U32* hashTable3;
U32* chainTable;
+
int dedicatedDictSearch; /* Indicates whether this matchState is using the
* dedicated dictionary search structure.
*/
/* Block splitting */
int splitBlocks;
+ /* Param for deciding whether to use row-based matchfinder */
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
+
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
}
}
-
-
-/* *********************************
-* Hash Chain
+/***********************************
+* Dedicated dict search
***********************************/
-#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
-
-/* Update chains up to ip (excluded)
- Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
- ZSTD_matchState_t* ms,
- const ZSTD_compressionParameters* const cParams,
- const BYTE* ip, U32 const mls)
-{
- U32* const hashTable = ms->hashTable;
- const U32 hashLog = cParams->hashLog;
- U32* const chainTable = ms->chainTable;
- const U32 chainMask = (1 << cParams->chainLog) - 1;
- const BYTE* const base = ms->window.base;
- const U32 target = (U32)(ip - base);
- U32 idx = ms->nextToUpdate;
-
- while(idx < target) { /* catch up */
- size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
- NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
- hashTable[h] = idx;
- idx++;
- }
-
- ms->nextToUpdate = target;
- return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
-}
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
{
U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
-
U32 hashIdx;
assert(ms->cParams.chainLog <= 24);
ms->nextToUpdate = target;
}
+/* Returns the longest match length found in the dedicated dict search structure.
+ * If none are longer than the argument ml, then ml will be returned.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
+ const ZSTD_matchState_t* const dms,
+ const BYTE* const ip, const BYTE* const iLimit,
+ const BYTE* const prefixStart, const U32 curr,
+ const U32 dictLimit, const size_t ddsIdx) {
+ const U32 ddsLowestIndex = dms->window.dictLimit;
+ const BYTE* const ddsBase = dms->window.base;
+ const BYTE* const ddsEnd = dms->window.nextSrc;
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+ U32 ddsAttempt;
+ U32 matchIndex;
+
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+ }
+
+ {
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+ U32 const chainIndex = chainPackedPointer >> 8;
+
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
+ }
+
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+ size_t currentMl=0;
+ const BYTE* match;
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+ match = ddsBase + matchIndex;
+
+ if (!matchIndex) {
+ return ml;
+ }
+
+ /* guaranteed by table construction */
+ (void)ddsLowestIndex;
+ assert(matchIndex >= ddsLowestIndex);
+ assert(match+4 <= ddsEnd);
+ if (MEM_read32(match) == MEM_read32(ip)) {
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+ }
+
+ /* save best solution */
+ if (currentMl > ml) {
+ ml = currentMl;
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+ if (ip+currentMl == iLimit) {
+ /* best possible, avoids read overflow on next attempt */
+ return ml;
+ }
+ }
+ }
+
+ {
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+ U32 chainIndex = chainPackedPointer >> 8;
+ U32 const chainLength = chainPackedPointer & 0xFF;
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+ U32 chainAttempt;
+
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+ }
+
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+ size_t currentMl=0;
+ const BYTE* match;
+ matchIndex = dms->chainTable[chainIndex];
+ match = ddsBase + matchIndex;
+
+ /* guaranteed by table construction */
+ assert(matchIndex >= ddsLowestIndex);
+ assert(match+4 <= ddsEnd);
+ if (MEM_read32(match) == MEM_read32(ip)) {
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+ }
+
+ /* save best solution */
+ if (currentMl > ml) {
+ ml = currentMl;
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+ }
+ }
+ }
+ return ml;
+}
+
+
+/* *********************************
+* Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+ Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+ ZSTD_matchState_t* ms,
+ const ZSTD_compressionParameters* const cParams,
+ const BYTE* ip, U32 const mls)
+{
+ U32* const hashTable = ms->hashTable;
+ const U32 hashLog = cParams->hashLog;
+ U32* const chainTable = ms->chainTable;
+ const U32 chainMask = (1 << cParams->chainLog) - 1;
+ const BYTE* const base = ms->window.base;
+ const U32 target = (U32)(ip - base);
+ U32 idx = ms->nextToUpdate;
+
+ while(idx < target) { /* catch up */
+ size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+ NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+ hashTable[h] = idx;
+ idx++;
+ }
+
+ ms->nextToUpdate = target;
+ return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
/* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE
}
if (dictMode == ZSTD_dedicatedDictSearch) {
- const U32 ddsLowestIndex = dms->window.dictLimit;
- const BYTE* const ddsBase = dms->window.base;
- const BYTE* const ddsEnd = dms->window.nextSrc;
- const U32 ddsSize = (U32)(ddsEnd - ddsBase);
- const U32 ddsIndexDelta = dictLimit - ddsSize;
- const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
- const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
- U32 ddsAttempt;
-
- for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
- PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
- }
-
- {
- U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
- U32 const chainIndex = chainPackedPointer >> 8;
-
- PREFETCH_L1(&dms->chainTable[chainIndex]);
- }
-
- for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
- size_t currentMl=0;
- const BYTE* match;
- matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
- match = ddsBase + matchIndex;
-
- if (!matchIndex) {
- return ml;
- }
-
- /* guaranteed by table construction */
- (void)ddsLowestIndex;
- assert(matchIndex >= ddsLowestIndex);
- assert(match+4 <= ddsEnd);
- if (MEM_read32(match) == MEM_read32(ip)) {
- /* assumption : matchIndex <= dictLimit-4 (by table construction) */
- currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
- }
-
- /* save best solution */
- if (currentMl > ml) {
- ml = currentMl;
- *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
- if (ip+currentMl == iLimit) {
- /* best possible, avoids read overflow on next attempt */
- return ml;
- }
- }
- }
-
- {
- U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
- U32 chainIndex = chainPackedPointer >> 8;
- U32 const chainLength = chainPackedPointer & 0xFF;
- U32 const chainAttempts = nbAttempts - ddsAttempt;
- U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
- U32 chainAttempt;
-
- for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
- PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
- }
-
- for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
- size_t currentMl=0;
- const BYTE* match;
- matchIndex = dms->chainTable[chainIndex];
- match = ddsBase + matchIndex;
-
- /* guaranteed by table construction */
- assert(matchIndex >= ddsLowestIndex);
- assert(match+4 <= ddsEnd);
- if (MEM_read32(match) == MEM_read32(ip)) {
- /* assumption : matchIndex <= dictLimit-4 (by table construction) */
- currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
- }
-
- /* save best solution */
- if (currentMl > ml) {
- ml = currentMl;
- *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
- if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
- }
- }
- }
+ ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
+ ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
} else if (dictMode == ZSTD_dictMatchState) {
const U32* const dmsChainTable = dms->chainTable;
const U32 dmsChainSize = (1 << dms->cParams.chainLog);
}
}
+/* *********************************
+* (SIMD) Row-based matchfinder
+***********************************/
+/* Constants for row-based hash */
+#define ZSTD_ROW_HASH_TAG_OFFSET 1 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
+#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
+#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
+
+#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
+
+typedef U32 ZSTD_VecMask; /* Clarifies when we are interacting with a U32 representing a mask of matches */
+
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */
+
+#include <emmintrin.h>
+typedef __m128i ZSTD_Vec128;
+
+/* Returns a 128-bit container with 128-bits from src */
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+ return _mm_loadu_si128((ZSTD_Vec128 const*)src);
+}
+
+/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+ return _mm_set1_epi8((char)val);
+}
+
+/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask
+ * into a 32-bit mask that is the MSB of each byte.
+ * */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+ return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
+}
+
+typedef struct {
+ __m128i fst;
+ __m128i snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+ ZSTD_Vec256 v;
+ v.fst = ZSTD_Vec128_read(ptr);
+ v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+ return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+ ZSTD_Vec256 v;
+ v.fst = ZSTD_Vec128_set8(val);
+ v.snd = ZSTD_Vec128_set8(val);
+ return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+ ZSTD_VecMask fstMask;
+ ZSTD_VecMask sndMask;
+ fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+ sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+ return fstMask | (sndMask << 16);
+}
+
+#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */
+
+#include <arm_neon.h>
+typedef uint8x16_t ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+ return vld1q_u8((const BYTE* const)src);
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+ return vdupq_n_u8(val);
+}
+
+/* Mimics '_mm_movemask_epi8()' from SSE */
+static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) {
+ /* Shift out everything but the MSB bits in each byte */
+ uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7));
+ /* Merge the even lanes together with vsra (right shift and add) */
+ uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7));
+ uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
+ uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+ /* Extract the low 8 bits from each lane, merge */
+ return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8);
+}
+
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+ return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y));
+}
+
+typedef struct {
+ uint8x16_t fst;
+ uint8x16_t snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+ ZSTD_Vec256 v;
+ v.fst = ZSTD_Vec128_read(ptr);
+ v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+ return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+ ZSTD_Vec256 v;
+ v.fst = ZSTD_Vec128_set8(val);
+ v.snd = ZSTD_Vec128_set8(val);
+ return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+ ZSTD_VecMask fstMask;
+ ZSTD_VecMask sndMask;
+ fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+ sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+ return fstMask | (sndMask << 16);
+}
+
+#else /* Scalar fallback version */
+
+#define VEC128_NB_SIZE_T (16 / sizeof(size_t))
+typedef struct {
+ size_t vec[VEC128_NB_SIZE_T];
+} ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+ ZSTD_Vec128 ret;
+ ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t));
+ return ret;
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+ ZSTD_Vec128 ret = { {0} };
+ int startBit = sizeof(size_t) * 8 - 8;
+ for (;startBit >= 0; startBit -= 8) {
+ unsigned j = 0;
+ for (;j < VEC128_NB_SIZE_T; ++j) {
+ ret.vec[j] |= ((size_t)val << startBit);
+ }
+ }
+ return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+ ZSTD_VecMask res = 0;
+ unsigned i = 0;
+ unsigned l = 0;
+ for (; i < VEC128_NB_SIZE_T; ++i) {
+ const size_t cmp1 = x.vec[i];
+ const size_t cmp2 = y.vec[i];
+ unsigned j = 0;
+ for (; j < sizeof(size_t); ++j, ++l) {
+ if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+ res |= ((U32)1 << (j+i*sizeof(size_t)));
+ }
+ }
+ }
+ return res;
+}
+
+#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T
+typedef struct {
+ size_t vec[VEC256_NB_SIZE_T];
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) {
+ ZSTD_Vec256 ret;
+ ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t));
+ return ret;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+ ZSTD_Vec256 ret = { {0} };
+ int startBit = sizeof(size_t) * 8 - 8;
+ for (;startBit >= 0; startBit -= 8) {
+ unsigned j = 0;
+ for (;j < VEC256_NB_SIZE_T; ++j) {
+ ret.vec[j] |= ((size_t)val << startBit);
+ }
+ }
+ return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+ ZSTD_VecMask res = 0;
+ unsigned i = 0;
+ unsigned l = 0;
+ for (; i < VEC256_NB_SIZE_T; ++i) {
+ const size_t cmp1 = x.vec[i];
+ const size_t cmp2 = y.vec[i];
+ unsigned j = 0;
+ for (; j < sizeof(size_t); ++j, ++l) {
+ if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+ res |= ((U32)1 << (j+i*sizeof(size_t)));
+ }
+ }
+ }
+ return res;
+}
+
+#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */
+
+/* ZSTD_VecMask_next():
+ * Starting from the LSB, returns the idx of the next non-zero bit.
+ * Basically counting the nb of trailing zeroes.
+ */
+static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
+# if defined(_MSC_VER) /* Visual */
+ unsigned long r=0;
+ return _BitScanForward(&r, val) ? (U32)r : 0;
+# elif defined(__GNUC__) && (__GNUC__ >= 3)
+ return (U32)__builtin_ctz(val);
+# else
+ /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */
+ static const U32 multiplyDeBruijnBitPosition[32] =
+ {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+ return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27];
+# endif
+}
+
+/* ZSTD_VecMask_rotateRight():
+ * Rotates a bitfield to the right by "rotation" bits.
+ * If the rotation is greater than totalBits, the returned mask is 0.
+ */
+FORCE_INLINE_TEMPLATE ZSTD_VecMask
+ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) {
+ if (rotation == 0)
+ return mask;
+ switch (totalBits) {
+ default:
+ assert(0);
+ case 16:
+ return (mask >> rotation) | (U16)(mask << (16 - rotation));
+ case 32:
+ return (mask >> rotation) | (U32)(mask << (32 - rotation));
+ }
+}
+
+/* ZSTD_row_nextIndex():
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
+ * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
+ U32 const next = (*tagRow - 1) & rowMask;
+ *tagRow = (BYTE)next;
+ return next;
+}
+
+/* ZSTD_isAligned():
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
+ */
+MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
+ assert((align & (align - 1)) == 0);
+ return (((size_t)ptr) & (align - 1)) == 0;
+}
+
+/* ZSTD_row_prefetch():
+ * Performs prefetching for the hashTable and tagTable at a given row.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+ PREFETCH_L1(hashTable + relRow);
+ if (rowLog == 5) {
+ PREFETCH_L1(hashTable + relRow + 16);
+ }
+ PREFETCH_L1(tagTable + relRow);
+ assert(rowLog == 4 || rowLog == 5);
+ assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */
+ assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */
+}
+
+/* ZSTD_row_fillHashCache():
+ * Fill up the hash cache starting at idx, prefetching ZSTD_ROW_HASH_CACHE_SIZE entries.
+ */
+static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+ U32 const rowLog, U32 const mls,
+ U32 idx, const BYTE* const iend)
+{
+ U32 const* const hashTable = ms->hashTable;
+ U16 const* const tagTable = ms->tagTable;
+ U32 const hashLog = ms->rowHashLog;
+ U32 const maxElemsToPrefetch = (base + idx) >= iend ? 0 : (U32)(iend - (base + idx));
+ U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
+
+ for (; idx < lim; ++idx) {
+ U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+ ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+ ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
+ }
+
+ DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
+ ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
+ ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
+}
+
+/* ZSTD_row_nextCachedHash():
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+ U16 const* tagTable, BYTE const* base,
+ U32 idx, U32 const hashLog,
+ U32 const rowLog, U32 const mls)
+{
+ U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+ ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+ { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
+ cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
+ return hash;
+ }
+}
+
+/* ZSTD_row_update_internal():
+ * Inserts the byte at ip into the appropriate position in the hash table.
+ * Determines the relative row, and the position within the {16, 32} entry row to insert at.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+ U32 const mls, U32 const rowLog,
+ U32 const rowMask, U32 const useCache)
+{
+ U32* const hashTable = ms->hashTable;
+ U16* const tagTable = ms->tagTable;
+ U32 const hashLog = ms->rowHashLog;
+ const BYTE* const base = ms->window.base;
+ const U32 target = (U32)(ip - base);
+ U32 idx = ms->nextToUpdate;
+
+ DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target);
+ for (; idx < target; ++idx) {
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls)
+ : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+ U32* const row = hashTable + relRow;
+ BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
+ Explicit cast allows us to get exact desired position within each row */
+ U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+
+ assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
+ ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+ row[pos] = idx;
+ }
+ ms->nextToUpdate = target;
+}
+
+/* ZSTD_row_update():
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
+ * processing.
+ */
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+ const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
+ const U32 rowMask = (1u << rowLog) - 1;
+ const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
+
+ DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
+}
+
+/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
+ * the hash at the nth position in a row of the tagTable.
+ */
+FORCE_INLINE_TEMPLATE
+ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) {
+ ZSTD_VecMask matches = 0;
+ if (rowEntries == 16) {
+ ZSTD_Vec128 hashes = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+ ZSTD_Vec128 expandedTags = ZSTD_Vec128_set8(tag);
+ matches = ZSTD_Vec128_cmpMask8(hashes, expandedTags);
+ } else if (rowEntries == 32) {
+ ZSTD_Vec256 hashes = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+ ZSTD_Vec256 expandedTags = ZSTD_Vec256_set8(tag);
+ matches = ZSTD_Vec256_cmpMask8(hashes, expandedTags);
+ } else {
+ assert(0);
+ }
+ /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
+ to match up with the actual layout of the entries within the hashTable */
+ return ZSTD_VecMask_rotateRight(matches, head, rowEntries);
+}
+
+/* The high-level approach of the SIMD row based match finder is as follows:
+ * - Figure out where to insert the new entry:
+ * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
+ * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ * which row to insert into.
+ * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
+ * be considered as a circular buffer with a "head" index that resides in the tagTable.
+ * - Also insert the "tag" into the equivalent row and position in the tagTable.
+ * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
+ * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
+ * for alignment/performance reasons, leaving some bytes unused.
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ * generate a bitfield that we can cycle through to check the collisions in the hash table.
+ * - Pick the longest match.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_RowFindBestMatch_generic (
+ ZSTD_matchState_t* ms,
+ const BYTE* const ip, const BYTE* const iLimit,
+ size_t* offsetPtr,
+ const U32 mls, const ZSTD_dictMode_e dictMode,
+ const U32 rowLog)
+{
+ U32* const hashTable = ms->hashTable;
+ U16* const tagTable = ms->tagTable;
+ U32* const hashCache = ms->hashCache;
+ const U32 hashLog = ms->rowHashLog;
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const prefixStart = base + dictLimit;
+ const BYTE* const dictEnd = dictBase + dictLimit;
+ const U32 curr = (U32)(ip-base);
+ const U32 maxDistance = 1U << cParams->windowLog;
+ const U32 lowestValid = ms->window.lowLimit;
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+ const U32 rowEntries = (1U << rowLog);
+ const U32 rowMask = rowEntries - 1;
+ const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
+ U32 nbAttempts = 1U << cappedSearchLog;
+ size_t ml=4-1;
+
+ /* DMS/DDS variables that may be referenced laster */
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
+ size_t ddsIdx;
+ U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
+ U32 dmsTag;
+ U32* dmsRow;
+ BYTE* dmsTagRow;
+
+ if (dictMode == ZSTD_dedicatedDictSearch) {
+ const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+ { /* Prefetch DDS hashtable entry */
+ ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
+ PREFETCH_L1(&dms->hashTable[ddsIdx]);
+ }
+ ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - 5) : 0;
+ }
+
+ if (dictMode == ZSTD_dictMatchState) {
+ /* Prefetch DMS rows */
+ U32* const dmsHashTable = dms->hashTable;
+ U16* const dmsTagTable = dms->tagTable;
+ U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+ dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
+ dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
+ dmsRow = dmsHashTable + dmsRelRow;
+ ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
+ }
+
+ /* Update the hashTable and tagTable up to (but not including) ip */
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+ { /* Get the hash for ip, compute the appropriate row */
+ U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
+ U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+ U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
+ U32* const row = hashTable + relRow;
+ BYTE* tagRow = (BYTE*)(tagTable + relRow);
+ U32 const head = *tagRow & rowMask;
+ U32 matchBuffer[32 /* maximum nb entries per row */];
+ size_t numMatches = 0;
+ size_t currMatch = 0;
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
+
+ /* Cycle through the matches and prefetch */
+ for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+ U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+ U32 const matchIndex = row[matchPos];
+ assert(numMatches < rowEntries);
+ if (matchIndex < lowLimit)
+ break;
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+ PREFETCH_L1(base + matchIndex);
+ } else {
+ PREFETCH_L1(dictBase + matchIndex);
+ }
+ matchBuffer[numMatches++] = matchIndex;
+ }
+
+ /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
+ in ZSTD_row_update_internal() at the next search. */
+ {
+ U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+ tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+ row[pos] = ms->nextToUpdate++;
+ }
+
+ /* Return the longest match */
+ for (; currMatch < numMatches; ++currMatch) {
+ U32 const matchIndex = matchBuffer[currMatch];
+ size_t currentMl=0;
+ assert(matchIndex < curr);
+ assert(matchIndex >= lowLimit);
+
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+ const BYTE* const match = base + matchIndex;
+ assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
+ if (match[ml] == ip[ml]) /* potentially better */
+ currentMl = ZSTD_count(ip, match, iLimit);
+ } else {
+ const BYTE* const match = dictBase + matchIndex;
+ assert(match+4 <= dictEnd);
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+ }
+
+ /* Save best solution */
+ if (currentMl > ml) {
+ ml = currentMl;
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+ }
+ }
+ }
+
+ if (dictMode == ZSTD_dedicatedDictSearch) {
+ ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
+ ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
+ } else if (dictMode == ZSTD_dictMatchState) {
+ /* TODO: Measure and potentially add prefetching to DMS */
+ const U32 dmsLowestIndex = dms->window.dictLimit;
+ const BYTE* const dmsBase = dms->window.base;
+ const BYTE* const dmsEnd = dms->window.nextSrc;
+ const U32 dmsSize = (U32)(dmsEnd - dmsBase);
+ const U32 dmsIndexDelta = dictLimit - dmsSize;
+
+ { /* Get the hash for ip, compute the appropriate row */
+ U32 const head = *dmsTagRow & rowMask;
+ U32 matchBuffer[32 /* maximum nb row entries */];
+ size_t numMatches = 0;
+ size_t currMatch = 0;
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
+
+ for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+ U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+ U32 const matchIndex = dmsRow[matchPos];
+ if (matchIndex < dmsLowestIndex)
+ break;
+ PREFETCH_L1(dmsBase + matchIndex);
+ matchBuffer[numMatches++] = matchIndex;
+ }
+
+ /* Return the longest match */
+ for (; currMatch < numMatches; ++currMatch) {
+ U32 const matchIndex = matchBuffer[currMatch];
+ size_t currentMl=0;
+ assert(matchIndex >= dmsLowestIndex);
+ assert(matchIndex < curr);
+
+ { const BYTE* const match = dmsBase + matchIndex;
+ assert(match+4 <= dmsEnd);
+ if (MEM_read32(match) == MEM_read32(ip))
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+ }
+
+ if (currentMl > ml) {
+ ml = currentMl;
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+ if (ip+currentMl == iLimit) break;
+ }
+ }
+ }
+ }
+ return ml;
+}
+
+/* Inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS (
+ ZSTD_matchState_t* ms,
+ const BYTE* ip, const BYTE* const iLimit,
+ const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog)
+{
+ switch(ms->cParams.minMatch)
+ {
+ default : /* includes case 3 */
+ case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog);
+ case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog);
+ case 7 :
+ case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog);
+ }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog (
+ ZSTD_matchState_t* ms,
+ const BYTE* ip, const BYTE* const iLimit,
+ size_t* offsetPtr)
+{
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+ switch(cappedSearchLog)
+ {
+ default :
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4);
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5);
+ }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog(
+ ZSTD_matchState_t* ms,
+ const BYTE* ip, const BYTE* const iLimit,
+ size_t* offsetPtr)
+{
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+ switch(cappedSearchLog)
+ {
+ default :
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4);
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5);
+ }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog(
+ ZSTD_matchState_t* ms,
+ const BYTE* ip, const BYTE* const iLimit,
+ size_t* offsetPtr)
+{
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+ switch(cappedSearchLog)
+ {
+ default :
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4);
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5);
+ }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog (
+ ZSTD_matchState_t* ms,
+ const BYTE* ip, const BYTE* const iLimit,
+ size_t* offsetPtr)
+{
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+ switch(cappedSearchLog)
+ {
+ default :
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4);
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5);
+ }
+}
+
/* *******************************
* Common parser - lazy strategy
*********************************/
-typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_lazy_generic(
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
- const BYTE* const ilimit = iend - 8;
+ const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 16 : iend - 8;
const BYTE* const base = ms->window.base;
const U32 prefixLowestIndex = ms->window.dictLimit;
const BYTE* const prefixLowest = base + prefixLowestIndex;
+ const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms,
* that should never occur (extDict modes go to the other implementation
* below and there is no DDSS for binary tree search yet).
*/
- const searchMax_f searchFuncs[4][2] = {
+ const searchMax_f searchFuncs[4][3] = {
{
ZSTD_HcFindBestMatch_selectMLS,
- ZSTD_BtFindBestMatch_selectMLS
+ ZSTD_BtFindBestMatch_selectMLS,
+ ZSTD_RowFindBestMatch_selectRowLog
},
{
+ NULL,
NULL,
NULL
},
{
ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
- ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS,
+ ZSTD_RowFindBestMatch_dictMatchState_selectRowLog
},
{
ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
- NULL
+ NULL,
+ ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog
}
};
- searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
+ searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod];
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
const int isDMS = dictMode == ZSTD_dictMatchState;
assert(searchMax != NULL);
- DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
-
- /* init */
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
U32 const curr = (U32)(ip - base);
assert(offset_2 <= dictAndPrefixLength);
}
+ if (searchMethod == search_rowHash) {
+ ZSTD_row_fillHashCache(ms, base, rowLog,
+ MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+ ms->nextToUpdate, ilimit);
+ }
+
/* Match Loop */
#if defined(__GNUC__) && defined(__x86_64__)
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
}
+/* Row-based matchfinder */
+size_t ZSTD_compressBlock_lazy2_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
- const BYTE* const ilimit = iend - 8;
+ const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 16 : iend - 8;
const BYTE* const base = ms->window.base;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
const U32 windowLog = ms->cParams.windowLog;
+ const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
- searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
-
+ const searchMax_f searchFuncs[3] = {
+ ZSTD_HcFindBestMatch_extDict_selectMLS,
+ ZSTD_BtFindBestMatch_extDict_selectMLS,
+ ZSTD_RowFindBestMatch_extDict_selectRowLog
+ };
+ searchMax_f searchMax = searchFuncs[(int)searchMethod];
U32 offset_1 = rep[0], offset_2 = rep[1];
- DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
/* init */
ip += (ip == prefixStart);
+ if (searchMethod == search_rowHash) {
+ ZSTD_row_fillHashCache(ms, base, rowLog,
+ MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+ ms->nextToUpdate, ilimit);
+ }
/* Match Loop */
#if defined(__GNUC__) && defined(__x86_64__)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
+
+size_t ZSTD_compressBlock_greedy_extDict_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+
+{
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize)
+
+{
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+}
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_extDict_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+
#if defined (__cplusplus)
}
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
void const* src, size_t srcSize)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
unsigned const minMatch = cParams->minMatch;
ZSTD_blockCompressor const blockCompressor =
- ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+ ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize;
*/
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
void const* src, size_t srcSize);
/**
* ZSTD_c_blockDelimiters
* ZSTD_c_validateSequences
* ZSTD_c_splitBlocks
+ * ZSTD_c_useRowMatchFinder
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
ZSTD_c_experimentalParam10=1007,
ZSTD_c_experimentalParam11=1008,
ZSTD_c_experimentalParam12=1009,
- ZSTD_c_experimentalParam13=1010
+ ZSTD_c_experimentalParam13=1010,
+ ZSTD_c_experimentalParam14=1011
} ZSTD_cParameter;
typedef struct {
ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */
} ZSTD_literalCompressionMode_e;
+typedef enum {
+ ZSTD_urm_auto = 0, /* Automatically determine whether or not we use row matchfinder */
+ ZSTD_urm_disableRowMatchFinder = 1, /* Never use row matchfinder */
+ ZSTD_urm_enableRowMatchFinder = 2 /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;
/***************************************
* Frame size functions
*/
#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
0, /* ldmHashLog */
0, /* ldmBuckSizeLog */
0, /* ldmHashRateLog */
- ZSTD_lcm_auto /* literalCompressionMode */
+ ZSTD_lcm_auto, /* literalCompressionMode */
+ 0 /* useRowMatchFinder */
};
return res;
}
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
}
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
+ CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
int ldmBucketSizeLog;
int ldmHashRateLog;
ZSTD_literalCompressionMode_e literalCompressionMode;
+ int useRowMatchFinder; /* use row-based matchfinder if possible */
} BMK_advancedParams_t;
/* returns default parameters used by nonAdvanced functions */
int blockSize;
int overlapLog;
U32 adaptiveMode;
+ U32 useRowMatchFinder;
int rsyncable;
int minAdaptLevel;
int maxAdaptLevel;
prefs->adaptiveMode = adapt;
}
+void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
+ prefs->useRowMatchFinder = useRowMatchFinder;
+}
+
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
if ((rsyncable>0) && (prefs->nbWorkers==0))
EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
}
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt);
void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel);
void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel);
+void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder);
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize);
void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag);
void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag);
DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog);
DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1);
DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n");
+ DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n");
# ifdef ZSTD_MULTITHREAD
DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n");
main_pause = 0,
nbWorkers = 0,
adapt = 0,
+ useRowMatchFinder = 0,
adaptMin = MINCLEVEL,
adaptMax = MAXCLEVEL,
rsyncable = 0,
if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; }
if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
+ if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; }
+ if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
benchParams.ldmFlag = ldmFlag;
benchParams.ldmMinMatch = (int)g_ldmMinMatch;
benchParams.ldmHashLog = (int)g_ldmHashLog;
+ benchParams.useRowMatchFinder = useRowMatchFinder;
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
}
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);
if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);
FIO_setAdaptiveMode(prefs, (unsigned)adapt);
+ FIO_setUseRowMatchFinder(prefs, useRowMatchFinder);
FIO_setAdaptMin(prefs, adaptMin);
FIO_setAdaptMax(prefs, adaptMax);
FIO_setRsyncable(prefs, rsyncable);
else
operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else
- (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */
+ (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n");
#endif
} else { /* decompression or test */
fuzz-*.log
rt_lib_*
d_lib_*
+crash-*
# misc
trace
/* Set misc parameters */
setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
+ setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
size_t const contentSize = 9 KB;
const void* const dict = (const char*)CNBuffer;
const void* const contentStart = (const char*)dict + flatdictSize;
+ /* These upper bounds are generally within a few bytes of the compressed size */
size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770,
3770, 3770, 3770, 3750, 3750,
3742, 3670, 3670, 3660, 3660,
3660, 3660, 3660, 3660, 3660,
3660, 3660, 3660 };
size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940,
- 2950, 2950, 2921, 2900, 2891,
+ 2950, 2950, 2925, 2900, 2891,
2910, 2910, 2910, 2770, 2760,
2750, 2750, 2750, 2750, 2750,
2750, 2750, 2750 };
DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n",
l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
}
+ /* Dict compression with DMS */
+ for ( l=1 ; l <= maxLevel; l++) {
+ size_t wdict_cSize;
+ CHECK_Z( ZSTD_CCtx_loadDictionary(ctxOrig, dict, flatdictSize) );
+ CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
+ CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
+ CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
+ wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
+ if (wdict_cSize > target_wdict_cSize[l]) {
+ DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",
+ l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]);
+ goto _output_error;
+ }
+ DISPLAYLEVEL(4, "level %i with dictionary and compress2 : max expected %u >= reached %u \n",
+ l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
+ }
DISPLAYLEVEL(4, "compression efficiency tests OK \n");
}