if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
+ params.ldmParams.windowLog = params.cParams.windowLog;
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashEveryLog < 32);
ptr = zc->ldmState.hashTable + ldmHSize;
zc->ldmSequences = (rawSeq*)ptr;
ptr = zc->ldmSequences + maxNbLdmSeq;
+
+ memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
}
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
ZSTD_reduceTable_internal(table, size, reducerValue, 1);
}
-
-/*! ZSTD_ldm_reduceTable() :
- * reduce table indexes by `reducerValue` */
-static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
- U32 const reducerValue)
-{
- U32 u;
- for (u = 0; u < size; u++) {
- if (table[u].offset < reducerValue) table[u].offset = 0;
- else table[u].offset -= reducerValue;
- }
-}
-
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
U32 const h3Size = (U32)1 << ms->hashLog3;
ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
}
-
- if (zc->appliedParams.ldmParams.enableLdm) {
- U32 const ldmHSize = (U32)1 << zc->appliedParams.ldmParams.hashLog;
- ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue);
- }
}
if (zc->appliedParams.ldmParams.enableLdm) {
size_t const nbSeq =
ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences,
- ms, &zc->appliedParams.ldmParams,
+ &zc->appliedParams.ldmParams,
src, srcSize, extDict);
lastLLSize =
ZSTD_ldm_blockCompress(zc->ldmSequences, nbSeq,
if (!ZSTD_window_update(&ms->window, src, srcSize)) {
ms->nextToUpdate = ms->window.dictLimit;
}
+ if (cctx->appliedParams.ldmParams.enableLdm)
+ ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
{ size_t const cSize = frame ?
} ldmEntry_t;
typedef struct {
+ ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
U32 minMatchLength; /* Minimum match length */
U32 hashEveryLog; /* Log number of entries to skip */
+ U32 windowLog; /* Window log for the LDM */
} ldmParams_t;
struct ZSTD_CCtx_params_s {
}
}
-size_t ZSTD_ldm_generateSequences(
- ldmState_t* ldmState, rawSeq* sequences, ZSTD_matchState_t const* ms,
+static size_t ZSTD_ldm_generateSequences_internal(
+ ldmState_t* ldmState, rawSeq* sequences,
ldmParams_t const* params, void const* src, size_t srcSize,
int const extDict)
{
U32 const hashEveryLog = params->hashEveryLog;
U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
/* Prefix and extDict parameters */
- U32 const dictLimit = ms->window.dictLimit;
- U32 const lowestIndex = extDict ? ms->window.lowLimit : dictLimit;
- BYTE const* const base = ms->window.base;
- BYTE const* const dictBase = extDict ? ms->window.dictBase : NULL;
+ U32 const dictLimit = ldmState->window.dictLimit;
+ U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+ BYTE const* const base = ldmState->window.base;
+ BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
BYTE const* const lowPrefixPtr = base + dictLimit;
return sequences - sequencesStart;
}
+/*! ZSTD_ldm_reduceTable() :
+ * reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+ U32 const reducerValue)
+{
+ U32 u;
+ for (u = 0; u < size; u++) {
+ if (table[u].offset < reducerValue) table[u].offset = 0;
+ else table[u].offset -= reducerValue;
+ }
+}
+
+size_t ZSTD_ldm_generateSequences(
+ ldmState_t* ldmState, rawSeq* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize,
+ int const extDict)
+{
+ U32 const maxDist = 1U << params->windowLog;
+ BYTE const* const istart = (BYTE const*)src;
+ size_t const kMaxChunkSize = 1 << 20;
+ size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+ size_t nbSeq = 0;
+ size_t chunk;
+
+ assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+ /* Check that ZSTD_window_update() has been called for this chunk prior
+ * to passing it to this function.
+ */
+ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+ for (chunk = 0; chunk < nbChunks; ++chunk) {
+ size_t const chunkStart = chunk * kMaxChunkSize;
+ size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize);
+ size_t const chunkSize = chunkEnd - chunkStart;
+
+ assert(chunkStart < srcSize);
+ if (ZSTD_window_needOverflowCorrection(ldmState->window)) {
+ U32 const ldmHSize = 1U << params->hashLog;
+ U32 const correction = ZSTD_window_correctOverflow(
+ &ldmState->window, /* cycleLog */ 0, maxDist, src);
+ ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+ }
+ /* kMaxChunkSize should be small enough that we don't lose too much of
+ * the window through early invalidation.
+ * TODO: * Test the chunk size.
+ * * Try invalidation after the sequence generation and test the
+ * the offset against maxDist directly.
+ */
+ ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd,
+ maxDist);
+ nbSeq += ZSTD_ldm_generateSequences_internal(
+ ldmState, sequences + nbSeq, params, istart + chunkStart, chunkSize,
+ extDict);
+ }
+ return nbSeq;
+}
+
#if 0
/**
* If the sequence length is longer than remaining then the sequence is split
*
* Generates the sequences using the long distance match finder.
* The sequences completely parse a prefix of the source, but leave off the last
- * literals. Returns the number of sequences generated into `sequences`.
+ * literals. Returns the number of sequences generated into `sequences`. The
+ * user must have called ZSTD_window_update() for all of the input they have,
+ * even if they pass it to ZSTD_ldm_generateSequences() in chunks.
*
* NOTE: The source may be any size, assuming it doesn't overflow the hash table
* indices, and the output sequences table is large enough..
*/
size_t ZSTD_ldm_generateSequences(
- ldmState_t* ldms, rawSeq* sequences, ZSTD_matchState_t const* ms,
+ ldmState_t* ldms, rawSeq* sequences,
ldmParams_t const* params, void const* src, size_t srcSize,
int const extDict);