typedef struct {
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
- U64 posInSrc; /* Position in src stream */
+ U64 posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_sequencePosition;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
}
#endif
-/* Returns offset code, given a raw offset and repcode array */
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32* const rep, U32 ll0) {
U32 offCode = rawOffset + ZSTD_REP_MOVE;
U32 repCode = 0;
repCode = 3;
}
if (repCode) {
+ /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
offCode = repCode - 1;
}
return offCode;
}
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
static size_t ZSTD_copySequencesToSeqStoreBlockDelim(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_CCtx* cctx) {
size_t idx = seqPos->idx;
BYTE const* ip = (BYTE const*)(src);
- BYTE const* iend = ip + blockSize;
+ const BYTE const* iend = ip + blockSize;
repcodes_t updatedRepcodes;
U32 dictSize;
U32 litLength;
if (cctx->cdict) {
dictSize = cctx->cdict->dictContentSize;
- } else if (cctx->prefixDict.dictSize) {
+ } else if (cctx->prefixDict.dict) {
dictSize = cctx->prefixDict.dictSize;
} else {
dictSize = 0;
}
-
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
litLength = inSeqs[idx].litLength;
}
/* Returns the number of bytes to move the current read position back by. Only non-zero
- * if we ended up splitting a sequence.
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
*/
static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
size_t startPosInSequence = seqPos->posInSequence;
size_t endPosInSequence = seqPos->posInSequence + blockSize;
BYTE const* ip = (BYTE const*)(src);
- BYTE const* iend = ip + blockSize;
+ BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */
repcodes_t updatedRepcodes;
U32 bytesAdjustment = 0;
U32 finalMatchSplit = 0;
U32 repCode;
if (cctx->cdict) {
- dictSize = ZSTD_sizeof_CDict(cctx->cdict);
- } else if (cctx->prefixDict.dictSize) {
+ dictSize = cctx->cdict->dictContentSize;
+ } else if (cctx->prefixDict.dict) {
dictSize = cctx->prefixDict.dictSize;
- } else if (ZSTD_sizeof_localDict(cctx->localDict)) {
- dictSize = ZSTD_sizeof_localDict(cctx->localDict);
+ } else {
+ dictSize = 0;
}
DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %zu PIS: %u blockSize: %zu windowSize: %u", idx, startPosInSequence, blockSize, windowSize);
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
startPosInSequence = 0;
idx++;
} else {
- /* This is the final sequence we're adding from inSeqs, and endPosInSequence
+ /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
does not reach the end of the match. So, we have to split the sequence */
- DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+ DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+ currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
if (endPosInSequence > litLength) {
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
U32 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
} else {
/* Move the position in sequence backwards so that we don't split match, and break to store
* the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
- * should go.
+ * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+ * would cause the first half of the match to be too small
*/
bytesAdjustment = endPosInSequence - currSeq.litLength;
endPosInSequence = currSeq.litLength;
assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
seqPos->idx = idx;
seqPos->posInSequence = endPosInSequence;
- /* Update repcodes */
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
iend -= bytesAdjustment;
/* Compress, block-by-block, all of the sequences given.
*
- * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
*/
static size_t ZSTD_compressSequences_internal(void* dst, size_t dstCapacity,
ZSTD_CCtx* cctx,
op += frameHeaderSize;
dstCapacity -= frameHeaderSize;
cSize += frameHeaderSize;
-
- /* Update checksum if requested */
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
XXH64_update(&cctx->xxhState, src, srcSize);
}
-
/* cSize includes block header size and compressed sequences size */
compressedBlocksSize = ZSTD_compressSequences_internal(op, dstCapacity,
- cctx, inSeqs, inSeqsSize,
- src, srcSize);
+ cctx, inSeqs, inSeqsSize,
+ src, srcSize);
FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
cSize += compressedBlocksSize;
dstCapacity -= compressedBlocksSize;
/* Test with block delimiters roundtrip */
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
- compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
+ compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in sequence compression with block delims\n");
goto _output_error;
/* Test with no block delimiters roundtrip */
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
- compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
+ compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in sequence compression with no block delims\n");
goto _output_error;