entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
DEBUGLOG(4, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
- DEBUGLOG(4, "First seqs:", nbSeq);
+ /*DEBUGLOG(4, "First seqs:", nbSeq);
for (int i = 0; i < 5; ++i) {
DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[i].offset, seqStorePtr->sequencesStart[i].matchLength, seqStorePtr->sequencesStart[i].litLength);
}
DEBUGLOG(4, "Final seqs:", nbSeq);
for (int i = 1; i < 5; ++i) {
DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[nbSeq-i].offset, seqStorePtr->sequencesStart[nbSeq-i].matchLength, seqStorePtr->sequencesStart[nbSeq-i].litLength);
- }
+ }*/
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
U32 endPosInSequence; /* Position within sequence at index 'endIdx' where range ends */
} ZSTD_sequenceRange;
-
-static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t nbBytes,
- const ZSTD_Sequence* const inSeqs, size_t inSeqsSize) {
+/* Returns the number of additional bytes consumed, after blockSize. Can be negative */
+static int ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t blockSize,
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+ ZSTD_sequenceFormat_e format) {
U32 idx = sequenceRange->endIdx;
- U32 endPosInSequence = sequenceRange->endPosInSequence + nbBytes;
- DEBUGLOG(4, "ZSTD_updateSequenceRange: startidx %u startpos: %u endidx: %u endpos: %u",
+ U32 endPosInSequence = sequenceRange->endPosInSequence + blockSize;
+ int bytesAdjustment = 0;
+ DEBUGLOG(4, "ZSTD_updateSequenceRange: %u bytes, startidx %u startpos: %u endidx: %u endpos: %u", blockSize,
sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence);
+ DEBUGLOG(4, "endPosInSequence begin val: %u", endPosInSequence);
while (endPosInSequence && idx < inSeqsSize) {
ZSTD_Sequence currSeq = inSeqs[idx];
+ DEBUGLOG(4, "curr Seq: idx: %u ll: %u ml: %u, of: %u", idx, currSeq.litLength, currSeq.matchLength, currSeq.offset);
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
idx++;
}
}
- if (idx == inSeqsSize) {
- endPosInSequence = 0;
- }
- if (endPosInSequence == inSeqs[idx].litLength + inSeqs[idx].matchLength) {
- endPosInSequence = 0;
- idx++;
+ if (format == ZSTD_sf_noBlockDelimiters) {
+ assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+ DEBUGLOG(4, "idx: %u", idx);
+ if (idx != inSeqsSize && endPosInSequence > inSeqs[idx].litLength) {
+ DEBUGLOG(4, "Endpos is in the match");
+ if (inSeqs[idx].matchLength >= blockSize) {
+ /* Only split the match if it's too large */
+ U32 firstHalfMatchLength = endPosInSequence - inSeqs[idx].litLength;
+ U32 secondHalfMatchLength = inSeqs[idx].litLength - firstHalfMatchLength;
+ assert(firstHalfMatchLength >= MINMATCH || secondHalfMatchLength >= MINMATCH);
+ if (firstHalfMatchLength < MINMATCH && firstHalfMatchLength != 0) {
+ /* Move the endPos forward so that it creates match of at least MINMATCH length */
+ endPosInSequence = MINMATCH + inSeqs[idx].litLength;
+ bytesAdjustment = MINMATCH - firstHalfMatchLength;
+ } else if (secondHalfMatchLength < MINMATCH && secondHalfMatchLength != 0) {
+ /* Move the endPos backward so that it creates match of at least MINMATCH length */
+ endPosInSequence -= MINMATCH - secondHalfMatchLength;
+ bytesAdjustment = secondHalfMatchLength - MINMATCH;
+ }
+ } else {
+ /* If we can, prefer to simply move endPos to the end of the literals */
+ bytesAdjustment = (int)inSeqs[idx].litLength - (int)endPosInSequence;
+ endPosInSequence = inSeqs[idx].litLength;
+ }
+ }
}
sequenceRange->startIdx = sequenceRange->endIdx;
DEBUGLOG(4, "endidx: (of: %u ml: %u ll: %u)", inSeqs[sequenceRange->endIdx].offset, inSeqs[sequenceRange->endIdx].matchLength, inSeqs[sequenceRange->endIdx].litLength);
DEBUGLOG(4, "finished update: startidx %u startpos: %u endidx: %u endpos: %u",
sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence);
+ DEBUGLOG(4, "final PIS was adjusted by: %d bytes", bytesAdjustment);
+ return bytesAdjustment;
}
/* Returns size of sequences range copied, otherwise ZSTD error code */
static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequenceRange* seqRange,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
- const void* src, size_t srcSize) {
+ const void* src, size_t srcSize, ZSTD_sequenceFormat_e format) {
DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: numSeqs: %zu srcSize: %zu", inSeqsSize, srcSize);
size_t idx = seqRange->startIdx;
BYTE const* istart = (BYTE const*)src;
U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE;
/* Adjust litLength and matchLength for the sequence at startIdx */
- if (idx == seqRange->startIdx) {
+ if (seqRange->startIdx == seqRange->endIdx) {
+ U32 seqLength = seqRange->endPosInSequence - seqRange->startPosInSequence;
+ RETURN_ERROR_IF(seqLength > litLength + matchLength, corruption_detected, "SeqLength cannot be bigger than sequence length!");
+ if (seqLength <= litLength) {
+ /* Sequence is entirely literals, break and store last literals */
+ break;
+ } else if (seqLength <= litLength + matchLength) {
+ matchLength = seqLength - litLength;
+ }
+ } else if (idx == seqRange->startIdx) {
U32 posInSequence = seqRange->startPosInSequence;
DEBUGLOG(4, "At startIdx: idx: %u PIS: %u", idx, posInSequence);
assert(posInSequence <= litLength + matchLength);
if (posInSequence >= litLength) {
- litLength = 0;
+ /* position is within the match */
posInSequence -= litLength;
+ litLength = 0;
+ matchLength -= posInSequence;
} else {
+ /* position is within the literals */
litLength -= posInSequence;
}
- matchLength -= posInSequence;
if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE /* dont trigger this in lastLL case */) {
DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode);
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx");
}
DEBUGLOG(4, "startIdx seq finalized: ll: %u ml: %u", litLength, matchLength);
- }
-
- /* Adjust litLength and matchLength for the sequence at endIdx */
- if (idx == seqRange->endIdx) {
+ } else if (idx == seqRange->endIdx) {
U32 posInSequence = seqRange->endPosInSequence;
DEBUGLOG(4, "Reached endIdx. idx: %u PIS: %u", idx, posInSequence);
if (posInSequence == 0) {
corruption_detected, "Contract violation");
}
assert(posInSequence <= litLength + matchLength);
- if (posInSequence < litLength) {
- litLength = posInSequence;
- matchLength = 0;
+ if (posInSequence <= litLength) {
+ /* position is within the last literals, break and store last literals */
+ break;
} else {
+ /* position is within the match */
matchLength = posInSequence - litLength;
+ /* ZSTD_updateSequenceRange should never give us a position such that we generate a match too small */
if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE) {
DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode);
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx");
}
}
- DEBUGLOG(4, "final ll:%u ml: %u", litLength, matchLength);
+ DEBUGLOG(4, "endIdx seq finalized: ll:%u ml: %u", litLength, matchLength);
}
- /* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) implies we are ending a block */
+ /* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) signals block delimiters */
if (matchLength == 0 && offCode == ZSTD_REP_MOVE) {
+ RETURN_ERROR_IF(format == ZSTD_sf_noBlockDelimiters, corruption_detected, "No block delimiters allowed!");
/* Handle last literals case */
if (litLength > 0) {
DEBUGLOG(4, "Storing last literals: %u bytes, idx: %u", litLength, idx);
continue;
}
- //DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep);
+ DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep);
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! of: %u ml: %u ll: %u", offCode, matchLength, litLength);
if (inSeqs[idx].rep) {
ZSTD_storeSeq(seqStore, litLength, ip, iend, inSeqs[idx].rep - 1, matchLength - MINMATCH);
ip += matchLength + litLength;
}
+ if (format == ZSTD_sf_noBlockDelimiters && ip != iend) {
+ assert(ip <= iend);
+ U32 lastLLSize = (U32)(iend - ip);
+ DEBUGLOG(4, "Storing last literals of size: %u", lastLLSize);
+ ZSTD_storeLastLiterals(seqStore, ip, lastLLSize);
+ }
+
DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: done");
return 0;
}
while (remaining) {
U32 cBlockSize;
+ int additionalByteAdjustment;
lastBlock = remaining <= cctx->blockSize;
blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
blockSeqStore = cctx->seqStore;
ZSTD_resetSeqStore(&blockSeqStore);
+ additionalByteAdjustment = ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize, format);
+ blockSize += additionalByteAdjustment;
DEBUGLOG(4, "Working on new block. Blocksize: %u", blockSize);
- ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize);
/* Skip over uncompressible blocks */
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
continue;
}
- ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize);
+ ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize, format);
compressedSeqsSize = ZSTD_compressSequences(&blockSeqStore,
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
&cctx->appliedParams,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize, int compressionLevel,
ZSTD_sequenceFormat_e format) {
- DEBUGLOG(4, "ZSTD_compressSequences_ext()");
BYTE* op = (BYTE*)dst;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t cSize = 0;
size_t compressedBlocksSize = 0;
size_t frameHeaderSize = 0;
+ DEBUGLOG(4, "ZSTD_compressSequences_ext()");
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
/* Initialization stage */
ZSTDb_buffered) , "");
assert(cctx->appliedParams.nbWorkers == 0);
}
- DEBUGLOG(4, "CCtx blockSize: %zu\n", cctx->blockSize);
+ DEBUGLOG(4, "CCtx blockSize: %zu", cctx->blockSize);
if (dstCapacity < ZSTD_compressBound(srcSize))
RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!");
dstCapacity -= frameHeaderSize;
cSize += frameHeaderSize;
- if (cctx->appliedParams.ldmParams.enableLdm) {
- ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
- }
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
XXH64_update(&cctx->xxhState, src, srcSize);
}
}
cSize += compressedBlocksSize;
dstCapacity -= compressedBlocksSize;
- DEBUGLOG(4, "cSize after compressSequences_internal: %zu\n", cSize);
+ DEBUGLOG(4, "cSize after compressSequences_internal: %zu", cSize);
if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
cSize += 4;
}
- DEBUGLOG(4, "Final compressed size: %zu\n", cSize);
+ DEBUGLOG(4, "Final compressed size: %zu", cSize);
ZSTD_freeCCtx(cctx);
return cSize;
}