bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
return bounds;
+ case ZSTD_c_deterministicRefPrefix:
+ bounds.lowerBound = 0;
+ bounds.upperBound = 1;
+ return bounds;
+
default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
case ZSTD_c_validateSequences:
case ZSTD_c_splitBlocks:
case ZSTD_c_useRowMatchFinder:
+ case ZSTD_c_deterministicRefPrefix:
default:
return 0;
}
case ZSTD_c_validateSequences:
case ZSTD_c_splitBlocks:
case ZSTD_c_useRowMatchFinder:
+ case ZSTD_c_deterministicRefPrefix:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
return CCtxParams->useRowMatchFinder;
+ case ZSTD_c_deterministicRefPrefix:
+ BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
+ CCtxParams->deterministicRefPrefix = !!value;
+ return CCtxParams->deterministicRefPrefix;
+
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
case ZSTD_c_useRowMatchFinder :
*value = (int)CCtxParams->useRowMatchFinder;
break;
+ case ZSTD_c_deterministicRefPrefix:
+ *value = (int)CCtxParams->deterministicRefPrefix;
+ break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
if (!srcSize) return fhSize; /* do not generate an empty block if no input */
- if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+ if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
+ ms->forceNonContiguous = 0;
ms->nextToUpdate = ms->window.dictLimit;
}
if (cctx->appliedParams.ldmParams.enableLdm) {
- ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+ ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
}
if (!frame) {
}
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
- ZSTD_window_update(&ms->window, src, srcSize);
+ ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+ ms->forceNonContiguous = params->deterministicRefPrefix;
if (loadLdmDict) {
- ZSTD_window_update(&ls->window, src, srcSize);
+ ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
}
U32* hashTable3;
U32* chainTable;
+ U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+
int dedicatedDictSearch; /* Indicates whether this matchState is using the
* dedicated dictionary search structure.
*/
/* Param for deciding whether to use row-based matchfinder */
ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
+ /* Always load a dictionary in ext-dict mode (not prefix mode)? */
+ int deterministicRefPrefix;
+
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
* Returns non-zero if the segment is contiguous.
*/
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
- void const* src, size_t srcSize)
+ void const* src, size_t srcSize,
+ int forceNonContiguous)
{
BYTE const* const ip = (BYTE const*)src;
U32 contiguous = 1;
assert(window->base != NULL);
assert(window->dictBase != NULL);
/* Check if blocks follow each other */
- if (src != window->nextSrc) {
+ if (src != window->nextSrc || forceNonContiguous) {
/* not contiguous */
size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
if (dictSize > 0) {
if (dictContentType == ZSTD_dct_rawContent) {
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
} else {
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
seqStore.size == 0 && seqStore.capacity > 0);
assert(src.size <= serialState->params.jobSize);
- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
error = ZSTD_ldm_generateSequences(
&serialState->ldmState, &seqStore,
&serialState->params.ldmParams, src.start, src.size);
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
}
+ if (!job->firstJob) {
+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
+ if (ZSTD_isError(err)) JOB_ERROR(err);
+ }
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast,
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }
+ if (!job->firstJob) {
+ /* Double check that we don't have an ext-dict, because then our
+ * repcode invalidation doesn't work.
+ */
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+ }
ZSTD_CCtx_trace(cctx, 0);
_endJob:
ZSTD_c_experimentalParam11=1008,
ZSTD_c_experimentalParam12=1009,
ZSTD_c_experimentalParam13=1010,
- ZSTD_c_experimentalParam14=1011
+ ZSTD_c_experimentalParam14=1011,
+ ZSTD_c_experimentalParam15=1012
} ZSTD_cParameter;
typedef struct {
*/
#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
setRand(cctx, ZSTD_c_splitBlocks, 0, 1, producer);
+ setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
}
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : testing dict compression for determinism : ", testNb++);
+ {
+ size_t const testSize = 1024;
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+ char* dict = (char*)malloc(2 * testSize);
+ int ldmEnabled, level;
+
+ RDG_genBuffer(dict, testSize, 0.5, 0.5, seed);
+ RDG_genBuffer(CNBuffer, testSize, 0.6, 0.6, seed);
+ memcpy(dict + testSize, CNBuffer, testSize);
+ for (level = 1; level <= 5; ++level) {
+ for (ldmEnabled = 0; ldmEnabled <= 1; ++ldmEnabled) {
+ size_t cSize0;
+ XXH64_hash_t compressedChecksum0;
+
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ldmEnabled));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_deterministicRefPrefix, 1));
+
+ CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, testSize));
+ cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, testSize);
+ CHECK_Z(cSize);
+ CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, testSize, compressedBuffer, cSize, dict, testSize));
+
+ cSize0 = cSize;
+ compressedChecksum0 = XXH64(compressedBuffer, cSize, 0);
+
+ CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, testSize));
+ cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, dict + testSize, testSize);
+ CHECK_Z(cSize);
+
+ if (cSize != cSize0) goto _output_error;
+ if (XXH64(compressedBuffer, cSize, 0) != compressedChecksum0) goto _output_error;
+ }
+ }
+
+ ZSTD_freeCCtx(cctx);
+ ZSTD_freeDCtx(dctx);
+ free(dict);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
DISPLAYLEVEL(3, "test%3i : LDM + opt parser with small uncompressible block ", testNb++);
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_DCtx* dctx = ZSTD_createDCtx();
}
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
+ if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) );
/* Apply parameters */
if (opaqueAPI) {