}
typedef struct {
- U32 idx; /* Index in array of ZSTD_Sequence*/
+ U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
+ U64 posInSrc; /* Position in src stream */
} ZSTD_sequencePosition;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+/* Returns a ZSTD error code if sequence is not valid */
+static size_t ZSTD_validateSequence(U32 offCode, U32 repCode, U32 matchLength,
+ size_t posInSrc, U32 windowLog, U32 dictSize) {
+ U32 offsetBound;
+ U32 windowSize = 1 << windowLog;
+ /* posInSrc represents the amount of data the the decoder would decode up to this point.
+ * As long as the amount of data decoded is less than or equal to window size, offsets may be
+ * larger than the total length of output decoded in order to reference the dict, even larger than
+ * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+ */
+ offsetBound = posInSrc > windowSize ? windowSize : posInSrc + dictSize;
+ RETURN_ERROR_IF(!repCode && offCode - ZSTD_REP_MOVE > offsetBound, corruption_detected, "Offset too large!");
+ RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
+ return 0;
+}
+#endif
+
+/* Returns offset code, given a raw offset and repcode array */
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32* const rep, U32 ll0) {
+ U32 offCode = rawOffset + ZSTD_REP_MOVE;
+ U32 repCode = 0;
+
+ if (!ll0 && rawOffset == rep[0]) {
+ repCode = 1;
+ } else if (rawOffset == rep[1]) {
+ repCode = 2 - ll0;
+ } else if (rawOffset == rep[2]) {
+ repCode = 3 - ll0;
+ } else if (ll0 && rawOffset == rep[0] - 1) {
+ repCode = 3;
+ }
+ if (repCode) {
+ offCode = repCode - 1;
+ }
+ return offCode;
+}
+
+static size_t ZSTD_copySequencesToSeqStoreBlockDelim(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos,
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+ const void* src, size_t blockSize, ZSTD_CCtx* cctx) {
+ size_t idx = seqPos->idx;
+ BYTE const* ip = (BYTE const*)(src);
+ BYTE const* iend = ip + blockSize;
+ repcodes_t updatedRepcodes;
+ U32 dictSize;
+ U32 litLength;
+ U32 matchLength;
+ U32 ll0;
+ U32 offCode;
+
+ if (cctx->cdict) {
+ dictSize = cctx->cdict->dictContentSize;
+ } else if (cctx->prefixDict.dictSize) {
+ dictSize = cctx->prefixDict.dictSize;
+ } else {
+ dictSize = 0;
+ }
+
+ ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+ for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
+ litLength = inSeqs[idx].litLength;
+ matchLength = inSeqs[idx].matchLength;
+ ll0 = litLength == 0;
+ offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+ updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+
+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+ seqPos->posInSrc += litLength;
+ FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, repCode, matchLength,
+ seqPos->posInSrc, cctx->appliedParams.cParams.windowLog,
+ dictSize),
+ "Sequence validation failed");
+ seqPos->posInSrc += matchLength;
+#endif
+ ZSTD_storeSeq(seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+ ip += matchLength + litLength;
+ }
+ ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+ if (inSeqs[idx].litLength) {
+ DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
+ ZSTD_storeLastLiterals(seqStore, ip, inSeqs[idx].litLength);
+ ip += inSeqs[idx].litLength;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+ seqPos->posInSrc += inSeqs[idx].litLength;
+#endif
+ }
+ RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+ seqPos->idx = idx+1;
+ return 0;
+}
+
+/* Returns the number of bytes to move the current read position back by. Only non-zero
+ * if we ended up splitting a sequence.
+ */
static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_CCtx* cctx) {
size_t startPosInSequence = seqPos->posInSequence;
size_t endPosInSequence = seqPos->posInSequence + blockSize;
BYTE const* ip = (BYTE const*)(src);
- const BYTE* const iend = ip + blockSize;
- U32 windowSize = 1 << cctx->appliedParams.cParams.windowLog;
+ BYTE const* iend = ip + blockSize;
repcodes_t updatedRepcodes;
U32 bytesAdjustment = 0;
- U32 bytesread = 0;
-
- DEBUGLOG(3, "ZSTD_copySequencesToSeqStore: idx: %zu PIS: %u blockSize: %zu windowSize: %u", idx, startPosInSequence, blockSize, windowSize);
- DEBUGLOG(3, "start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+ U32 finalMatchSplit = 0;
+ U32 dictSize;
+ U32 litLength;
+ U32 matchLength;
+ U32 rawOffset;
+ U32 offCode;
+ U32 repCode;
+
+ if (cctx->cdict) {
+ dictSize = ZSTD_sizeof_CDict(cctx->cdict);
+ } else if (cctx->prefixDict.dictSize) {
+ dictSize = cctx->prefixDict.dictSize;
+ } else if (ZSTD_sizeof_localDict(cctx->localDict)) {
+ dictSize = ZSTD_sizeof_localDict(cctx->localDict);
+ }
+ DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %zu PIS: %u blockSize: %zu windowSize: %u", idx, startPosInSequence, blockSize, windowSize);
+ DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
- while (endPosInSequence && idx < inSeqsSize) {
- ZSTD_Sequence currSeq = inSeqs[idx];
- U32 litLength = currSeq.litLength;
- U32 matchLength = currSeq.matchLength;
- U32 rawOffset = currSeq.offset;
- U32 offCode = rawOffset + ZSTD_REP_MOVE;
- U32 repCode = cctx->calculateRepcodes ? 0 : currSeq.rep;
+ while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+ const ZSTD_Sequence currSeq = inSeqs[idx];
+ litLength = currSeq.litLength;
+ matchLength = currSeq.matchLength;
+ rawOffset = currSeq.offset;
+ repCode = 0;
/* Modify the sequence depending on where endPosInSequence lies */
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
if (startPosInSequence >= litLength) {
- /* Start pos is within the match */
startPosInSequence -= litLength;
litLength = 0;
matchLength -= startPosInSequence;
idx++;
} else {
/* This is the final sequence we're adding from inSeqs, and endPosInSequence
- does not reach the end of the match. So, we have to split something */
- DEBUGLOG(3, "Require a split: diff: %u, idx: %u PIS: %u", currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
- DEBUGLOG(3, "(of: %u ml: %u ll: %u)", rawOffset, matchLength, litLength);
+ does not reach the end of the match. So, we have to split the sequence */
+ DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
if (endPosInSequence > litLength) {
- DEBUGLOG(2, "hard case");
- /* This sequence ends inside the match, may need to split match */
- U32 firstHalfMatchLength = endPosInSequence - litLength;
+ litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
+ U32 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
if (matchLength > blockSize && firstHalfMatchLength >= MINMATCH) {
- U32 secondHalfMatchLength = matchLength - firstHalfMatchLength;
+ /* Only ever split the match if it is larger than the block size */
+ U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
if (secondHalfMatchLength < MINMATCH) {
/* Move the endPosInSequence backward so that it creates match of MINMATCH length */
endPosInSequence -= MINMATCH - secondHalfMatchLength;
firstHalfMatchLength -= bytesAdjustment;
}
matchLength = firstHalfMatchLength;
+ /* Flag that we split the last match - after storing the sequence, exit the loop,
+ but keep the value of endPosInSequence */
+ finalMatchSplit = 1;
} else {
- /* Move the position in sequence backwards so that we don't split match, and store
- the last literals */
- DEBUGLOG(2, "MOVING SEQ BACKWARDS");
- bytesAdjustment = endPosInSequence - litLength;
- endPosInSequence = litLength;
+ /* Move the position in sequence backwards so that we don't split match, and break to store
+ * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
+ * should go.
+ */
+ bytesAdjustment = endPosInSequence - currSeq.litLength;
+ endPosInSequence = currSeq.litLength;
break;
}
} else {
- /* This sequence ends inside the literals, store the last literals */
- litLength = startPosInSequence >= litLength ? 0 : endPosInSequence - startPosInSequence;
+ /* This sequence ends inside the literals, break to store the last literals */
break;
}
}
-
- if (matchLength < MINMATCH) {
- DEBUGLOG(2, "match too small");
- DEBUGLOG(2, "%u (of: %u ml: %u ll: %u)", idx, rawOffset, matchLength, litLength);
- RETURN_ERROR_IF(1, corruption_detected, "match");
- }
- if (cctx->calculateRepcodes == ZSTD_sf_calculateRepcodes) {
- U32 ll0 = (litLength == 0);
- /* Check if current offset matches anything in the repcode table */
- if (!ll0 && rawOffset == updatedRepcodes.rep[0]) {
- repCode = 1;
- } else if (rawOffset == updatedRepcodes.rep[1]) {
- repCode = 2 - ll0;
- } else if (rawOffset == updatedRepcodes.rep[2]) {
- repCode = 3 - ll0;
- } else if (ll0 && rawOffset == updatedRepcodes.rep[0] - 1) {
- repCode = 3;
- }
- if (repCode) {
- offCode = repCode - 1;
- }
+ /* Check if this offset can be represented with a repcode */
+ { U32 ll0 = (litLength == 0);
+ offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
- } else {
- offCode = repCode ? repCode-1 : offCode;
}
- DEBUGLOG(6, "Storing: idx: %zu (of: %u, ml: %u, ll: %u)", idx, offCode, matchLength, litLength);
- /* Validate the offCode */
+
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+ seqPos->posInSrc += litLength;
+ FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, repCode,
+ matchLength, seqPos->posInSrc,
+ cctx->appliedParams.cParams.windowLog, dictSize),
+ "Sequence validation failed");
+ seqPos->posInSrc += matchLength;
+#endif
+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
ZSTD_storeSeq(seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
ip += matchLength + litLength;
- bytesread += matchLength + litLength;
}
assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
seqPos->idx = idx;
/* Update repcodes */
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
- /* Store any last literals for ZSTD_sf_noBlockDelimiters mode */
- if (cctx->blockDelimiters == ZSTD_sf_noBlockDelimiters && ip != iend) {
+ iend -= bytesAdjustment;
+ if (ip != iend) {
+ /* Store any last literals */
U32 lastLLSize = (U32)(iend - ip);
assert(ip <= iend);
- DEBUGLOG(2, "Storing last literals of size: %u", lastLLSize);
+ DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
ZSTD_storeLastLiterals(seqStore, ip, lastLLSize);
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+ seqPos->posInSrc += lastLLSize;
+#endif
}
+
return bytesAdjustment;
}
U32 blockSize;
U32 compressedSeqsSize;
size_t remaining = srcSize;
- ZSTD_sequencePosition seqPos = {0, 0};
+ ZSTD_sequencePosition seqPos = {0, 0, 0};
seqStore_t blockSeqStore;
BYTE const* ip = (BYTE const*)src;
blockSeqStore = cctx->seqStore;
ZSTD_resetSeqStore(&blockSeqStore);
DEBUGLOG(4, "Working on new block. Blocksize: %u", blockSize);
-
- additionalByteAdjustment = ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx);
+ if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) {
+ additionalByteAdjustment = ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqPos,
+ inSeqs, inSeqsSize,
+ ip, blockSize, cctx);
+ } else {
+ additionalByteAdjustment = ZSTD_copySequencesToSeqStoreBlockDelim(&blockSeqStore, &seqPos,
+ inSeqs, inSeqsSize,
+ ip, blockSize, cctx);
+ }
FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
blockSize -= additionalByteAdjustment;
/* If blocks are too small, emit as a nocompress block */
DEBUGLOG(3, "ZSTD_compressSequences()");
assert(cctx != NULL);
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
-
/* Begin writing output, starting with frame header */
frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
op += frameHeaderSize;