From: senhuang42 Date: Mon, 2 Nov 2020 18:29:56 +0000 (-0500) Subject: Add initial support for new ZSTD_Sequence mode X-Git-Tag: v1.4.7~30^2~29 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c86151f53ce4916eb8e2512e7055f1418e86ce69;p=thirdparty%2Fzstd.git Add initial support for new ZSTD_Sequence mode --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0c3983503..57825ca24 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2148,14 +2148,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); DEBUGLOG(4, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); - DEBUGLOG(4, "First seqs:", nbSeq); + /*DEBUGLOG(4, "First seqs:", nbSeq); for (int i = 0; i < 5; ++i) { DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[i].offset, seqStorePtr->sequencesStart[i].matchLength, seqStorePtr->sequencesStart[i].litLength); } DEBUGLOG(4, "Final seqs:", nbSeq); for (int i = 1; i < 5; ++i) { DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[nbSeq-i].offset, seqStorePtr->sequencesStart[nbSeq-i].matchLength, seqStorePtr->sequencesStart[nbSeq-i].litLength); - } + }*/ ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); @@ -4508,15 +4508,19 @@ typedef struct { U32 endPosInSequence; /* Position within sequence at index 'endIdx' where range ends */ } ZSTD_sequenceRange; - -static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t nbBytes, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize) { +/* Returns the number of additional bytes consumed, after blockSize. Can be negative */ +static int ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t blockSize, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + ZSTD_sequenceFormat_e format) { U32 idx = sequenceRange->endIdx; - U32 endPosInSequence = sequenceRange->endPosInSequence + nbBytes; - DEBUGLOG(4, "ZSTD_updateSequenceRange: startidx %u startpos: %u endidx: %u endpos: %u", + U32 endPosInSequence = sequenceRange->endPosInSequence + blockSize; + int bytesAdjustment = 0; + DEBUGLOG(4, "ZSTD_updateSequenceRange: %u bytes, startidx %u startpos: %u endidx: %u endpos: %u", blockSize, sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence); + DEBUGLOG(4, "endPosInSequence begin val: %u", endPosInSequence); while (endPosInSequence && idx < inSeqsSize) { ZSTD_Sequence currSeq = inSeqs[idx]; + DEBUGLOG(4, "curr Seq: idx: %u ll: %u ml: %u, of: %u", idx, currSeq.litLength, currSeq.matchLength, currSeq.offset); if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { endPosInSequence -= currSeq.litLength + currSeq.matchLength; idx++; @@ -4525,12 +4529,31 @@ static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t n } } - if (idx == inSeqsSize) { - endPosInSequence = 0; - } - if (endPosInSequence == inSeqs[idx].litLength + inSeqs[idx].matchLength) { - endPosInSequence = 0; - idx++; + if (format == ZSTD_sf_noBlockDelimiters) { + assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + DEBUGLOG(4, "idx: %u", idx); + if (idx != inSeqsSize && endPosInSequence > inSeqs[idx].litLength) { + DEBUGLOG(4, "Endpos is in the match"); + if (inSeqs[idx].matchLength >= blockSize) { + /* Only split the match if it's too large */ + U32 firstHalfMatchLength = endPosInSequence - inSeqs[idx].litLength; + U32 secondHalfMatchLength = inSeqs[idx].litLength - firstHalfMatchLength; + assert(firstHalfMatchLength >= MINMATCH || secondHalfMatchLength >= MINMATCH); + if (firstHalfMatchLength < MINMATCH && firstHalfMatchLength != 0) { + /* Move the endPos forward so that it creates match of at least MINMATCH length */ + endPosInSequence = MINMATCH + inSeqs[idx].litLength; + bytesAdjustment = MINMATCH - firstHalfMatchLength; + } else if (secondHalfMatchLength < MINMATCH && secondHalfMatchLength != 0) { + /* Move the endPos backward so that it creates match of at least MINMATCH length */ + endPosInSequence -= MINMATCH - secondHalfMatchLength; + bytesAdjustment = secondHalfMatchLength - MINMATCH; + } + } else { + /* If we can, prefer to simply move endPos to the end of the literals */ + bytesAdjustment = (int)inSeqs[idx].litLength - (int)endPosInSequence; + endPosInSequence = inSeqs[idx].litLength; + } + } } sequenceRange->startIdx = sequenceRange->endIdx; @@ -4541,12 +4564,14 @@ static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t n DEBUGLOG(4, "endidx: (of: %u ml: %u ll: %u)", inSeqs[sequenceRange->endIdx].offset, inSeqs[sequenceRange->endIdx].matchLength, inSeqs[sequenceRange->endIdx].litLength); DEBUGLOG(4, "finished update: startidx %u startpos: %u endidx: %u endpos: %u", sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence); + DEBUGLOG(4, "final PIS was adjusted by: %d bytes", bytesAdjustment); + return bytesAdjustment; } /* Returns size of sequences range copied, otherwise ZSTD error code */ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequenceRange* seqRange, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) { + const void* src, size_t srcSize, ZSTD_sequenceFormat_e format) { DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: numSeqs: %zu srcSize: %zu", inSeqsSize, srcSize); size_t idx = seqRange->startIdx; BYTE const* istart = (BYTE const*)src; @@ -4559,26 +4584,34 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE; /* Adjust litLength and matchLength for the sequence at startIdx */ - if (idx == seqRange->startIdx) { + if (seqRange->startIdx == seqRange->endIdx) { + U32 seqLength = seqRange->endPosInSequence - seqRange->startPosInSequence; + RETURN_ERROR_IF(seqLength > litLength + matchLength, corruption_detected, "SeqLength cannot be bigger than sequence length!"); + if (seqLength <= litLength) { + /* Sequence is entirely literals, break and store last literals */ + break; + } else if (seqLength <= litLength + matchLength) { + matchLength = seqLength - litLength; + } + } else if (idx == seqRange->startIdx) { U32 posInSequence = seqRange->startPosInSequence; DEBUGLOG(4, "At startIdx: idx: %u PIS: %u", idx, posInSequence); assert(posInSequence <= litLength + matchLength); if (posInSequence >= litLength) { - litLength = 0; + /* position is within the match */ posInSequence -= litLength; + litLength = 0; + matchLength -= posInSequence; } else { + /* position is within the literals */ litLength -= posInSequence; } - matchLength -= posInSequence; if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE /* dont trigger this in lastLL case */) { DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode); RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx"); } DEBUGLOG(4, "startIdx seq finalized: ll: %u ml: %u", litLength, matchLength); - } - - /* Adjust litLength and matchLength for the sequence at endIdx */ - if (idx == seqRange->endIdx) { + } else if (idx == seqRange->endIdx) { U32 posInSequence = seqRange->endPosInSequence; DEBUGLOG(4, "Reached endIdx. idx: %u PIS: %u", idx, posInSequence); if (posInSequence == 0) { @@ -4586,21 +4619,24 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ corruption_detected, "Contract violation"); } assert(posInSequence <= litLength + matchLength); - if (posInSequence < litLength) { - litLength = posInSequence; - matchLength = 0; + if (posInSequence <= litLength) { + /* position is within the last literals, break and store last literals */ + break; } else { + /* position is within the match */ matchLength = posInSequence - litLength; + /* ZSTD_updateSequenceRange should never give us a position such that we generate a match too small */ if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE) { DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode); RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx"); } } - DEBUGLOG(4, "final ll:%u ml: %u", litLength, matchLength); + DEBUGLOG(4, "endIdx seq finalized: ll:%u ml: %u", litLength, matchLength); } - /* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) implies we are ending a block */ + /* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) signals block delimiters */ if (matchLength == 0 && offCode == ZSTD_REP_MOVE) { + RETURN_ERROR_IF(format == ZSTD_sf_noBlockDelimiters, corruption_detected, "No block delimiters allowed!"); /* Handle last literals case */ if (litLength > 0) { DEBUGLOG(4, "Storing last literals: %u bytes, idx: %u", litLength, idx); @@ -4610,7 +4646,7 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ continue; } - //DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep); + DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep); RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! of: %u ml: %u ll: %u", offCode, matchLength, litLength); if (inSeqs[idx].rep) { ZSTD_storeSeq(seqStore, litLength, ip, iend, inSeqs[idx].rep - 1, matchLength - MINMATCH); @@ -4620,6 +4656,13 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ ip += matchLength + litLength; } + if (format == ZSTD_sf_noBlockDelimiters && ip != iend) { + assert(ip <= iend); + U32 lastLLSize = (U32)(iend - ip); + DEBUGLOG(4, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(seqStore, ip, lastLLSize); + } + DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: done"); return 0; } @@ -4647,12 +4690,14 @@ size_t ZSTD_compressSequences_ext_internal(void* dst, size_t dstCapacity, while (remaining) { U32 cBlockSize; + int additionalByteAdjustment; lastBlock = remaining <= cctx->blockSize; blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; blockSeqStore = cctx->seqStore; ZSTD_resetSeqStore(&blockSeqStore); + additionalByteAdjustment = ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize, format); + blockSize += additionalByteAdjustment; DEBUGLOG(4, "Working on new block. Blocksize: %u", blockSize); - ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize); /* Skip over uncompressible blocks */ if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { @@ -4667,7 +4712,7 @@ size_t ZSTD_compressSequences_ext_internal(void* dst, size_t dstCapacity, continue; } - ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize); + ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize, format); compressedSeqsSize = ZSTD_compressSequences(&blockSeqStore, &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, &cctx->appliedParams, @@ -4720,13 +4765,13 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize, int compressionLevel, ZSTD_sequenceFormat_e format) { - DEBUGLOG(4, "ZSTD_compressSequences_ext()"); BYTE* op = (BYTE*)dst; ZSTD_CCtx* const cctx = ZSTD_createCCtx(); size_t cSize = 0; size_t compressedBlocksSize = 0; size_t frameHeaderSize = 0; + DEBUGLOG(4, "ZSTD_compressSequences_ext()"); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); /* Initialization stage */ @@ -4758,7 +4803,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity, ZSTDb_buffered) , ""); assert(cctx->appliedParams.nbWorkers == 0); } - DEBUGLOG(4, "CCtx blockSize: %zu\n", cctx->blockSize); + DEBUGLOG(4, "CCtx blockSize: %zu", cctx->blockSize); if (dstCapacity < ZSTD_compressBound(srcSize)) RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!"); @@ -4767,9 +4812,6 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity, dstCapacity -= frameHeaderSize; cSize += frameHeaderSize; - if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); - } if (cctx->appliedParams.fParams.checksumFlag && srcSize) { XXH64_update(&cctx->xxhState, src, srcSize); } @@ -4783,7 +4825,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity, } cSize += compressedBlocksSize; dstCapacity -= compressedBlocksSize; - DEBUGLOG(4, "cSize after compressSequences_internal: %zu\n", cSize); + DEBUGLOG(4, "cSize after compressSequences_internal: %zu", cSize); if (cctx->appliedParams.fParams.checksumFlag) { U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); @@ -4793,7 +4835,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity, cSize += 4; } - DEBUGLOG(4, "Final compressed size: %zu\n", cSize); + DEBUGLOG(4, "Final compressed size: %zu", cSize); ZSTD_freeCCtx(cctx); return cSize; }