(U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
cctx->stage = ZSTDcs_init;
cctx->dictID = 0;
+ if (params.ldmParams.enableLdm)
+ ZSTD_window_clear(&cctx->ldmState.window);
ZSTD_invalidateMatchState(&cctx->blockState.matchState);
ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
XXH64_reset(&cctx->xxhState, 0);
ptr = zc->ldmState.hashTable + ldmHSize;
zc->ldmSequences = (rawSeq*)ptr;
ptr = zc->ldmSequences + maxNbLdmSeq;
+ zc->maxNbLdmSequences = maxNbLdmSeq;
memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
}
memset(ptr, 0, ldmBucketSize);
zc->ldmState.bucketOffsets = (BYTE*)ptr;
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
+ ZSTD_window_clear(&zc->ldmState.window);
}
/* buffers */
for (i = 0; i < ZSTD_REP_NUM; ++i)
zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
}
- if (zc->appliedParams.ldmParams.enableLdm) {
- size_t const nbSeq =
- ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences,
- &zc->appliedParams.ldmParams,
- src, srcSize, extDict);
+ if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+ assert(!zc->appliedParams.ldmParams.enableLdm);
+ /* Updates ldmSeqStore.pos */
lastLLSize =
- ZSTD_ldm_blockCompress(zc->ldmSequences, nbSeq,
+ ZSTD_ldm_blockCompress(&zc->externSeqStore,
ms, &zc->seqStore,
zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams,
src, srcSize, extDict);
+ assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+ } else if (zc->appliedParams.ldmParams.enableLdm) {
+ rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
+
+ ldmSeqStore.seq = zc->ldmSequences;
+ ldmSeqStore.capacity = zc->maxNbLdmSequences;
+ /* Updates ldmSeqStore.size */
+ CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+ &zc->appliedParams.ldmParams,
+ src, srcSize));
+ /* Updates ldmSeqStore.pos */
+ lastLLSize =
+ ZSTD_ldm_blockCompress(&ldmSeqStore,
+ ms, &zc->seqStore,
+ zc->blockState.nextCBlock->rep,
+ &zc->appliedParams.cParams,
+ src, srcSize, extDict);
+ assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
}
}
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+ if (cctx->stage != ZSTDcs_init)
+ return ERROR(stage_wrong);
+ if (cctx->appliedParams.ldmParams.enableLdm)
+ return ERROR(parameter_unsupported);
+ cctx->externSeqStore.seq = seq;
+ cctx->externSeqStore.size = nbSeq;
+ cctx->externSeqStore.capacity = nbSeq;
+ cctx->externSeqStore.pos = 0;
+ return 0;
+}
+
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
}
static size_t ZSTD_ldm_generateSequences_internal(
- ldmState_t* ldmState, rawSeq* sequences,
- ldmParams_t const* params, void const* src, size_t srcSize,
- int const extDict)
+ ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+ ldmParams_t const* params, void const* src, size_t srcSize)
{
- rawSeq const* const sequencesStart = sequences;
/* LDM parameters */
+ int const extDict = ZSTD_window_hasExtDict(ldmState->window);
U32 const minMatchLength = params->minMatchLength;
U64 const hashPower = ldmState->hashPower;
U32 const hBits = params->hashLog - params->bucketSizeLog;
*/
U32 const matchIndex = bestEntry->offset;
U32 const offset = current - matchIndex;
-
- sequences->litLength = (U32)(ip - anchor);
- sequences->matchLength = (U32)mLength;
- sequences->offset = offset;
- ++sequences;
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+ /* Out of sequence storage */
+ if (rawSeqStore->size == rawSeqStore->capacity)
+ return ERROR(dstSize_tooSmall);
+ seq->litLength = (U32)(ip - anchor);
+ seq->matchLength = (U32)mLength;
+ seq->offset = offset;
+ rawSeqStore->size++;
}
/* Insert the current entry into the hash table */
ip += mLength;
anchor = ip;
}
- /* Return the number of sequences generated */
- return sequences - sequencesStart;
+ return iend - anchor;
}
/*! ZSTD_ldm_reduceTable() :
}
size_t ZSTD_ldm_generateSequences(
- ldmState_t* ldmState, rawSeq* sequences,
- ldmParams_t const* params, void const* src, size_t srcSize,
- int const extDict)
+ ldmState_t* ldmState, rawSeqStore_t* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize)
{
U32 const maxDist = 1U << params->windowLog;
BYTE const* const istart = (BYTE const*)src;
size_t const kMaxChunkSize = 1 << 20;
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
- size_t nbSeq = 0;
size_t chunk;
+ size_t leftoverSize = 0;
assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
/* Check that ZSTD_window_update() has been called for this chunk prior
* to passing it to this function.
*/
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
- for (chunk = 0; chunk < nbChunks; ++chunk) {
+ /* The input could be very large (in zstdmt), so it must be broken up into
+ * chunks to enforce the maximmum distance and handle overflow correction.
+ */
+ assert(sequences->pos <= sequences->size);
+ assert(sequences->size <= sequences->capacity);
+ for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
size_t const chunkStart = chunk * kMaxChunkSize;
size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize);
size_t const chunkSize = chunkEnd - chunkStart;
+ size_t newLeftoverSize;
+ size_t const prevSize = sequences->size;
assert(chunkStart < srcSize);
+ /* 1. Perform overflow correction if necessary. */
if (ZSTD_window_needOverflowCorrection(ldmState->window)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, src);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
}
- /* kMaxChunkSize should be small enough that we don't lose too much of
+ /* 2. We enforce the maximum offset allowed.
+ *
+ * kMaxChunkSize should be small enough that we don't lose too much of
* the window through early invalidation.
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
*/
ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd,
maxDist);
- nbSeq += ZSTD_ldm_generateSequences_internal(
- ldmState, sequences + nbSeq, params, istart + chunkStart, chunkSize,
- extDict);
+ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+ newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+ ldmState, sequences, params, istart + chunkStart,
+ chunkSize);
+ if (ZSTD_isError(newLeftoverSize))
+ return newLeftoverSize;
+ /* 4. We add the leftover literals from previous iterations to the first
+ * newly generated sequence, or add the `newLeftoverSize` if none are
+ * generated.
+ */
+ /* Prepend the leftover literals from the last call */
+ if (prevSize < sequences->size) {
+ sequences->seq[prevSize].litLength += (U32)leftoverSize;
+ leftoverSize = newLeftoverSize;
+ } else {
+ assert(newLeftoverSize == chunkSize);
+ leftoverSize += chunkSize;
+ }
}
- return nbSeq;
+ return 0;
}
-#if 0
/**
* If the sequence length is longer than remaining then the sequence is split
* between this block and the next.
* Returns the current sequence to handle, or if the rest of the block should
* be literals, it returns a sequence with offset == 0.
*/
-static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq,
- size_t const seq, size_t const remaining,
- U32 const minMatch)
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+ U32 const remaining, U32 const minMatch)
{
- rawSeq sequence = sequences[seq];
+ size_t const pos = rawSeqStore->pos;
+ rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
assert(sequence.offset > 0);
/* Handle partial sequences */
if (remaining <= sequence.litLength) {
* They will become the last literals of this block.
* The next block starts off with the remaining literals.
*/
- sequences[seq].litLength -= remaining;
- *nbSeq = seq;
+ rawSeqStore->seq[pos].litLength -= remaining;
sequence.offset = 0;
} else if (remaining < sequence.litLength + sequence.matchLength) {
/* Split the match up into two sequences. One in this block, and one
assert(remaining > sequence.litLength);
assert(matchPrefix < sequence.matchLength);
assert(matchPrefix + matchSuffix == sequence.matchLength);
- /* Update the current sequence */
+ /* Update the first sequence */
sequence.matchLength = matchPrefix;
- /* Update the next sequence when long enough, otherwise omit it. */
+ /* Update the second sequence */
if (matchSuffix >= minMatch) {
- sequences[seq].litLength = 0;
- sequences[seq].matchLength = matchSuffix;
- *nbSeq = seq;
+ /* Update the second sequence, since the suffix is long enough */
+ rawSeqStore->seq[pos].litLength = 0;
+ rawSeqStore->seq[pos].matchLength = matchSuffix;
} else {
- sequences[seq + 1].litLength += matchSuffix;
- *nbSeq = seq + 1;
+ /* Omit the second sequence since the match suffix is too short.
+ * Add to the next sequences literals (if any).
+ */
+ if (pos + 1 < rawSeqStore->size)
+ rawSeqStore->seq[pos + 1].litLength += matchSuffix;
+ rawSeqStore->pos++; /* Consume the sequence */
}
if (sequence.matchLength < minMatch) {
/* Skip the current sequence if it is too short */
sequence.offset = 0;
}
+ } else {
+ /* No partial sequence */
+ rawSeqStore->pos++; /* Consume the sequence */
}
return sequence;
}
-#endif
-size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq,
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
int const extDict)
{
+ unsigned const minMatch = cParams->searchLength;
ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, extDict);
BYTE const* const base = ms->window.base;
BYTE const* const iend = istart + srcSize;
/* Input positions */
BYTE const* ip = istart;
- size_t seq;
+
+ assert(rawSeqStore->pos <= rawSeqStore->size);
+ assert(rawSeqStore->size <= rawSeqStore->capacity);
/* Loop through each sequence and apply the block compressor to the lits */
- for (seq = 0; seq < nbSeq; ++seq) {
- rawSeq const sequence = sequences[seq];
+ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+ /* maybeSplitSequence updates rawSeqStore->pos */
+ rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+ (U32)(iend - ip), minMatch);
int i;
-
+ /* End signal */
if (sequence.offset == 0)
break;
+ assert(sequence.offset <= (1U << cParams->windowLog));
assert(ip + sequence.litLength + sequence.matchLength <= iend);
/* Fill tables for block compressor */
ip += sequence.matchLength;
}
}
+ /* Fill the tables for the block compressor */
ZSTD_ldm_limitTableUpdate(ms, ip);
ZSTD_ldm_fillFastTables(ms, cParams, ip);
+ /* Compress the last literals */
{
size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
ip, iend - ip);
* ZSTD_ldm_generateSequences():
*
* Generates the sequences using the long distance match finder.
- * The sequences completely parse a prefix of the source, but leave off the last
- * literals. Returns the number of sequences generated into `sequences`. The
- * user must have called ZSTD_window_update() for all of the input they have,
- * even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
*
- * NOTE: The source may be any size, assuming it doesn't overflow the hash table
- * indices, and the output sequences table is large enough..
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ * sequences.
*/
size_t ZSTD_ldm_generateSequences(
- ldmState_t* ldms, rawSeq* sequences,
- ldmParams_t const* params, void const* src, size_t srcSize,
- int const extDict);
+ ldmState_t* ldms, rawSeqStore_t* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize);
/**
* ZSTD_ldm_blockCompress():
* block compressor. The literals section of every sequence is passed to the
* secondary block compressor, and those sequences are interspersed with the
* predefined sequences. Returns the length of the last literals.
- * `nbSeq` is the number of sequences available in `sequences`.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
*
* NOTE: The source must be at most the maximum block size, but the predefined
* sequences can be any size, and may be longer than the block. In the case that
* they are longer than the block, the last sequences may need to be split into
- * two. We handle that case correctly, and update `sequences` and `nbSeq`
- * appropriately.
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
*/
-size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq,
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
int const extDict);