From: Nick Terrell Date: Wed, 21 Feb 2018 03:34:43 +0000 (-0800) Subject: Split block compresser out of long range matcher X-Git-Tag: v1.3.4~1^2~48^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=af866b3a5854aa7a22853b8a2e545e2d61dfa6c3;p=thirdparty%2Fzstd.git Split block compresser out of long range matcher * `ZSTD_ldm_generateSequences()` generates the LDM sequences and stores them in a table. It should work with any chunk size, but is currently only called one block at a time. * `ZSTD_ldm_blockCompress()` emits the pre-defined sequences, and instead of encoding the literals directly, it passes them to a secondary block compressor. The code to handle chunk sizes greater than the block size is currently commented out, since it is unused. The next PR will uncomment exercise this code. * During optimal parsing, ensure LDM `minMatchLength` is at least `targetLength`. Also don't emit repcode matches in the LDM block compressor. Enabling the LDM with the optimal parser now actually improves the compression ratio. * The compression ratio is very similar to before. It is very slightly different, because the repcode handling is slightly different. If I remove immediate repcode checking in both branches the compressed size is exactly the same. * The speed looks to be the same or better than before. Up Next (in a separate PR) -------------------------- Allow sequence generation to happen prior to compression, and produce more than a block worth of sequences. Expose some API for zstdmt to consume. This will test out some currently untested code in `ZSTD_ldm_blockCompress()`. --- diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 65c08a825..1c6841a32 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -213,6 +213,12 @@ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* s /*-******************************************* * Private declarations *********************************************/ +typedef struct rawSeq_s { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; + typedef struct seqDef_s { U32 offset; U16 litLength; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 80b1d3662..74a51c25e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -718,12 +718,11 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms->cParams, /* forCCtx */ 1); - size_t const ldmSpace = params->ldmParams.enableLdm ? - ZSTD_ldm_getTableSize(params->ldmParams.hashLog, - params->ldmParams.bucketSizeLog) : 0; + size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); + size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + - matchStateSize + ldmSpace; + matchStateSize + ldmSpace + ldmSeqSpace; DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); @@ -990,7 +989,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, params.cParams.windowLog); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); assert(params.ldmParams.hashEveryLog < 32); zc->ldmState.hashPower = @@ -1005,17 +1004,19 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); void* ptr; /* Check if workSpace is large enough, alloc a new one if needed */ { size_t const entropySpace = HUF_WORKSPACE_SIZE; size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); size_t const bufferSpace = buffInSize + buffOutSize; - size_t const ldmSpace = params.ldmParams.enableLdm - ? ZSTD_ldm_getTableSize(params.ldmParams.hashLog, params.ldmParams.bucketSizeLog) - : 0; + size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); + size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); + size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + - matchStateSize + tokenSpace + bufferSpace; + ldmSeqSpace + matchStateSize + tokenSpace + + bufferSpace; DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers", (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10)); DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize); @@ -1070,7 +1071,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ zc->ldmState.hashTable = (ldmEntry_t*)ptr; ptr = zc->ldmState.hashTable + ldmHSize; + zc->ldmSequences = (rawSeq*)ptr; + ptr = zc->ldmSequences + maxNbLdmSeq; } + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); @@ -1820,34 +1824,39 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + ZSTD_matchState_t* const ms = &zc->blockState.matchState; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (U32)dstCapacity, zc->blockState.matchState.dictLimit, zc->blockState.matchState.nextToUpdate); + (U32)dstCapacity, ms->dictLimit, ms->nextToUpdate); if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ ZSTD_resetSeqStore(&(zc->seqStore)); /* limited update after a very long match */ - { const BYTE* const base = zc->blockState.matchState.base; + { const BYTE* const base = ms->base; const BYTE* const istart = (const BYTE*)src; const U32 current = (U32)(istart-base); - if (current > zc->blockState.matchState.nextToUpdate + 384) - zc->blockState.matchState.nextToUpdate = current - MIN(192, (U32)(current - zc->blockState.matchState.nextToUpdate - 384)); + if (current > ms->nextToUpdate + 384) + ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); } /* select and store sequences */ - { U32 const extDict = zc->blockState.matchState.lowLimit < zc->blockState.matchState.dictLimit; + { U32 const extDict = ms->lowLimit < ms->dictLimit; size_t lastLLSize; { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; } if (zc->appliedParams.ldmParams.enableLdm) { - typedef size_t (*ZSTD_ldmBlockCompressor)( - ldmState_t* ldms, ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], ZSTD_CCtx_params const* params, - void const* src, size_t srcSize); - ZSTD_ldmBlockCompressor const ldmBlockCompressor = extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm; - lastLLSize = ldmBlockCompressor(&zc->ldmState, &zc->blockState.matchState, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams, src, srcSize); + size_t const nbSeq = + ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences, + ms, &zc->appliedParams.ldmParams, + src, srcSize, extDict); + lastLLSize = + ZSTD_ldm_blockCompress(zc->ldmSequences, nbSeq, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + &zc->appliedParams.cParams, + src, srcSize, extDict); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); - lastLLSize = blockCompressor(&zc->blockState.matchState, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 67c1da6ef..8a02f764e 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -186,6 +186,7 @@ struct ZSTD_CCtx_s { seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ + rawSeq* ldmSequences; /* Storage for the ldm output sequences */ ZSTD_blockState_t blockState; U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index cc71de2e0..c18d68c8a 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -28,8 +28,17 @@ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) return 0; } -void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog) +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) { + U32 const windowLog = cParams->windowLog; + if (cParams->strategy >= ZSTD_btopt) { + /* Get out of the way of the optimal parser */ + U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); + assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); + assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); + params->minMatchLength = minMatch; + } if (params->hashLog == 0) { params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG); assert(params->hashLog <= ZSTD_HASHLOG_MAX); @@ -41,12 +50,19 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog) params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); } -size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) { - size_t const ldmHSize = ((size_t)1) << hashLog; - size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog); +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); size_t const ldmBucketSize = - ((size_t)1) << (hashLog - ldmBucketSizeLog); - return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t))); + ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; } /** ZSTD_ldm_getSmallHash() : @@ -278,50 +294,54 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) } } -size_t ZSTD_compressBlock_ldm( - ldmState_t* ldmState, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_CCtx_params const* params, void const* src, size_t srcSize) - +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeq* sequences, ZSTD_matchState_t const* ms, + ldmParams_t const* params, void const* src, size_t srcSize, + int const extDict) { - ZSTD_compressionParameters const* cParams = ¶ms->cParams; - const ldmParams_t ldmParams = params->ldmParams; - const U64 hashPower = ldmState->hashPower; - const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; - const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); - const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; - const BYTE* const base = ms->base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = ms->dictLimit; - const BYTE* const lowest = base + lowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE); - - const ZSTD_blockCompressor blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, 0); + rawSeq const* const sequencesStart = sequences; + /* LDM parameters */ + U32 const minMatchLength = params->minMatchLength; + U64 const hashPower = ldmState->hashPower; + U32 const hBits = params->hashLog - params->bucketSizeLog; + U32 const ldmBucketSize = 1U << params->bucketSizeLog; + U32 const hashEveryLog = params->hashEveryLog; + U32 const ldmTagMask = (1U << params->hashEveryLog) - 1; + /* Prefix and extDict parameters */ + U32 const dictLimit = ms->dictLimit; + U32 const lowestIndex = extDict ? ms->lowLimit : dictLimit; + BYTE const* const base = ms->base; + BYTE const* const dictBase = extDict ? ms->dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash */ + BYTE const* lastHashed = NULL; U64 rollingHash = 0; - const BYTE* lastHashed = NULL; - size_t i, lastLiterals; - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + while (ip <= ilimit) { size_t mLength; U32 const current = (U32)(ip - base); size_t forwardMatchLength = 0, backwardMatchLength = 0; ldmEntry_t* bestEntry = NULL; if (ip != istart) { rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], - lastHashed[ldmParams.minMatchLength], + lastHashed[minMatchLength], hashPower); } else { - rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); + rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength); } lastHashed = ip; /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != - ldmTagMask) { + if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) { ip++; continue; } @@ -331,27 +351,49 @@ size_t ZSTD_compressBlock_ldm( ldmEntry_t* const bucket = ZSTD_ldm_getBucket(ldmState, ZSTD_ldm_getSmallHash(rollingHash, hBits), - ldmParams); + *params); ldmEntry_t* cur; size_t bestMatchLength = 0; U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { - const BYTE* const pMatch = cur->offset + base; size_t curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength; if (cur->checksum != checksum || cur->offset <= lowestIndex) { continue; } - - curForwardMatchLength = ZSTD_count(ip, pMatch, iend); - if (curForwardMatchLength < ldmParams.minMatchLength) { - continue; + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowPrefixPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; } - curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( - ip, anchor, pMatch, lowest); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; if (curTotalMatchLength > bestMatchLength) { bestMatchLength = curTotalMatchLength; @@ -366,7 +408,7 @@ size_t ZSTD_compressBlock_ldm( if (bestEntry == NULL) { ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, current, - ldmParams); + *params); ip++; continue; } @@ -375,280 +417,217 @@ size_t ZSTD_compressBlock_ldm( mLength = forwardMatchLength + backwardMatchLength; ip -= backwardMatchLength; - /* Call the block compressor on the remaining literals */ { + /* Store the sequence: + * ip = current - backwardMatchLength + * The match is at (bestEntry->offset - backwardMatchLength) + */ U32 const matchIndex = bestEntry->offset; - const BYTE* const match = base + matchIndex - backwardMatchLength; - U32 const offset = (U32)(ip - match); - - /* Fill tables for block compressor */ - ZSTD_ldm_limitTableUpdate(ms, anchor); - ZSTD_ldm_fillFastTables(ms, cParams, anchor); - - /* Call block compressor and get remaining literals */ - lastLiterals = blockCompressor(ms, seqStore, rep, cParams, anchor, ip - anchor); - ms->nextToUpdate = (U32)(ip - base); - - /* Update repToConfirm with the new offset */ - for (i = ZSTD_REP_NUM - 1; i > 0; i--) - rep[i] = rep[i-1]; - rep[0] = offset; + U32 const offset = current - matchIndex; - /* Store the sequence with the leftover literals */ - ZSTD_storeSeq(seqStore, lastLiterals, ip - lastLiterals, - offset + ZSTD_REP_MOVE, mLength - MINMATCH); + sequences->litLength = (U32)(ip - anchor); + sequences->matchLength = (U32)mLength; + sequences->offset = offset; + ++sequences; } /* Insert the current entry into the hash table */ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, (U32)(lastHashed - base), - ldmParams); + *params); assert(ip + backwardMatchLength == lastHashed); /* Fill the hash table from lastHashed+1 to ip+mLength*/ /* Heuristic: don't need to fill the entire table at end of block */ - if (ip + mLength < ilimit) { + if (ip + mLength <= ilimit) { rollingHash = ZSTD_ldm_fillLdmHashTable( ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, ldmParams); + ip + mLength, base, hBits, *params); lastHashed = ip + mLength - 1; } ip += mLength; anchor = ip; - /* Check immediate repcode */ - while ( (ip < ilimit) - && ( (rep[1] > 0) && (rep[1] <= (U32)(ip-lowest)) - && (MEM_read32(ip) == MEM_read32(ip - rep[1])) )) { - - size_t const rLength = ZSTD_count(ip+4, ip+4-rep[1], - iend) + 4; - /* Swap repToConfirm[1] <=> repToConfirm[0] */ - { - U32 const tmpOff = rep[1]; - rep[1] = rep[0]; - rep[0] = tmpOff; - } - - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); - - /* Fill the hash table from lastHashed+1 to ip+rLength*/ - if (ip + rLength < ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + rLength, base, hBits, ldmParams); - lastHashed = ip + rLength - 1; - } - ip += rLength; - anchor = ip; - } } - - ZSTD_ldm_limitTableUpdate(ms, anchor); - ZSTD_ldm_fillFastTables(ms, cParams, anchor); - - lastLiterals = blockCompressor(ms, seqStore, rep, cParams, anchor, iend - anchor); - ms->nextToUpdate = (U32)(iend - base); - - /* Return the last literals size */ - return lastLiterals; + /* Return the number of sequences generated */ + return sequences - sequencesStart; } -size_t ZSTD_compressBlock_ldm_extDict( - ldmState_t* ldmState, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_CCtx_params const* params, void const* src, size_t srcSize) +#if 0 +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq, + size_t const seq, size_t const remaining, + U32 const minMatch) { - const ldmParams_t ldmParams = params->ldmParams; - ZSTD_compressionParameters const* cParams = ¶ms->cParams; - const U64 hashPower = ldmState->hashPower; - const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; - const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); - const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; - const BYTE* const base = ms->base; - const BYTE* const dictBase = ms->dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = ms->lowLimit; - const BYTE* const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ms->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE); - - const ZSTD_blockCompressor blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, 1); - U64 rollingHash = 0; - const BYTE* lastHashed = NULL; - size_t i, lastLiterals; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - size_t mLength; - const U32 current = (U32)(ip-base); - size_t forwardMatchLength = 0, backwardMatchLength = 0; - ldmEntry_t* bestEntry = NULL; - if (ip != istart) { - rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], - lastHashed[ldmParams.minMatchLength], - hashPower); + rawSeq sequence = sequences[seq]; + assert(sequence.offset > 0); + /* Handle partial sequences */ + if (remaining <= sequence.litLength) { + /* Split the literals that we have out of the sequence. + * They will become the last literals of this block. + * The next block starts off with the remaining literals. + */ + sequences[seq].litLength -= remaining; + *nbSeq = seq; + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + /* Split the match up into two sequences. One in this block, and one + * in the next with no literals. If either match would be shorter + * than searchLength we omit it. + */ + U32 const matchPrefix = remaining - sequence.litLength; + U32 const matchSuffix = sequence.matchLength - matchPrefix; + + assert(remaining > sequence.litLength); + assert(matchPrefix < sequence.matchLength); + assert(matchPrefix + matchSuffix == sequence.matchLength); + /* Update the current sequence */ + sequence.matchLength = matchPrefix; + /* Update the next sequence when long enough, otherwise omit it. */ + if (matchSuffix >= minMatch) { + sequences[seq].litLength = 0; + sequences[seq].matchLength = matchSuffix; + *nbSeq = seq; } else { - rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); + sequences[seq + 1].litLength += matchSuffix; + *nbSeq = seq + 1; } - lastHashed = ip; - - if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != - ldmTagMask) { - /* Don't insert and don't look for a match */ - ip++; - continue; - } - - /* Get the best entry and compute the match lengths */ - { - ldmEntry_t* const bucket = - ZSTD_ldm_getBucket(ldmState, - ZSTD_ldm_getSmallHash(rollingHash, hBits), - ldmParams); - ldmEntry_t* cur; - size_t bestMatchLength = 0; - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - - for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { - const BYTE* const curMatchBase = - cur->offset < dictLimit ? dictBase : base; - const BYTE* const pMatch = curMatchBase + cur->offset; - const BYTE* const matchEnd = - cur->offset < dictLimit ? dictEnd : iend; - const BYTE* const lowMatchPtr = - cur->offset < dictLimit ? dictStart : lowPrefixPtr; - size_t curForwardMatchLength, curBackwardMatchLength, - curTotalMatchLength; - - if (cur->checksum != checksum || cur->offset <= lowestIndex) { - continue; - } - - curForwardMatchLength = ZSTD_count_2segments( - ip, pMatch, iend, - matchEnd, lowPrefixPtr); - if (curForwardMatchLength < ldmParams.minMatchLength) { - continue; - } - curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( - ip, anchor, pMatch, lowMatchPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - - if (curTotalMatchLength > bestMatchLength) { - bestMatchLength = curTotalMatchLength; - forwardMatchLength = curForwardMatchLength; - backwardMatchLength = curBackwardMatchLength; - bestEntry = cur; - } - } + if (sequence.matchLength < minMatch) { + /* Skip the current sequence if it is too short */ + sequence.offset = 0; } + } + return sequence; +} +#endif - /* No match found -- continue searching */ - if (bestEntry == NULL) { - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - ldmParams); - ip++; - continue; - } +size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + int const extDict) +{ + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, extDict); + int const doImmediateRepCheck = cParams->strategy < ZSTD_btopt; + /* Prefix and extDict parameters */ + U32 const dictLimit = ms->dictLimit; + U32 const lowestIndex = extDict ? ms->lowLimit : dictLimit; + BYTE const* const base = ms->base; + BYTE const* const dictBase = extDict ? ms->dictBase : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + size_t seq; + /* Loop through each sequence and apply the block compressor to the lits */ + for (seq = 0; seq < nbSeq; ++seq) { + rawSeq const sequence = sequences[seq]; + int i; + + if (sequence.offset == 0) + break; - /* Match found */ - mLength = forwardMatchLength + backwardMatchLength; - ip -= backwardMatchLength; + assert(ip + sequence.litLength + sequence.matchLength <= iend); - /* Call the block compressor on the remaining literals */ + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, cParams, ip); + /* Run the block compressor */ { - /* ip = current - backwardMatchLength - * The match is at (bestEntry->offset - backwardMatchLength) */ - U32 const matchIndex = bestEntry->offset; - U32 const offset = current - matchIndex; - - /* Fill the hash table for the block compressor */ - ZSTD_ldm_limitTableUpdate(ms, anchor); - ZSTD_ldm_fillFastTables(ms, cParams, anchor); - - /* Call block compressor and get remaining literals */ - lastLiterals = blockCompressor(ms, seqStore, rep, cParams, anchor, ip - anchor); + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, cParams, ip, + sequence.litLength); + ip += sequence.litLength; ms->nextToUpdate = (U32)(ip - base); - - /* Update repToConfirm with the new offset */ + /* Update the repcodes */ for (i = ZSTD_REP_NUM - 1; i > 0; i--) rep[i] = rep[i-1]; - rep[0] = offset; - - /* Store the sequence with the leftover literals */ - ZSTD_storeSeq(seqStore, lastLiterals, ip - lastLiterals, - offset + ZSTD_REP_MOVE, mLength - MINMATCH); + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; } - - /* Insert the current entry into the hash table */ - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - ldmParams); - - /* Fill the hash table from lastHashed+1 to ip+mLength */ - assert(ip + backwardMatchLength == lastHashed); - if (ip + mLength < ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, - ldmParams); - lastHashed = ip + mLength - 1; - } - ip += mLength; - anchor = ip; - - /* check immediate repcode */ - while (ip < ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - rep[1]; - const BYTE* repMatch2 = repIndex2 < dictLimit ? - dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & - (repIndex2 > lowestIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < dictLimit ? - dictEnd : iend; - size_t const repLength2 = - ZSTD_count_2segments(ip+4, repMatch2+4, iend, - repEnd2, lowPrefixPtr) + 4; - - U32 tmpOffset = rep[1]; - rep[1] = rep[0]; - rep[0] = tmpOffset; - - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); - - /* Fill the hash table from lastHashed+1 to ip+repLength2*/ - if (ip + repLength2 < ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + repLength2, base, hBits, - ldmParams); - lastHashed = ip + repLength2 - 1; + /* Check immediate repcode */ + if (doImmediateRepCheck) { + rawSeq* nextSeq = (seq + 1 < *nbSeq) ? &sequences[seq + 1] : NULL; + /* Allow repcodes up to the next predefined sequence */ + BYTE const* const repCheckEnd = nextSeq + ? ip + nextSeq->litLength + : iend; + BYTE const* const repCheckLimit = repCheckEnd - HASH_READ_SIZE; + + assert(repCheckEnd <= iend); + + if (extDict) { + while (ip < repCheckLimit) { + U32 const current = (U32)(ip-base); + U32 const repIndex = current - rep[1]; + const BYTE* repMatch = repIndex < dictLimit ? + dictBase + repIndex : base + repIndex; + if ( (((U32)((dictLimit-1) - repIndex) >= 3) & + (repIndex > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd = repIndex < dictLimit ? + dictEnd : repCheckEnd; + size_t const rLength = ZSTD_count_2segments( + ip+4, repMatch+4, repCheckEnd, repEnd, + lowPrefixPtr) + 4; + /* Swap rep[1] <=> rep[0] */ + U32 const tmpOffset = rep[1]; + rep[1] = rep[0]; + rep[0] = tmpOffset; + + ZSTD_storeSeq(seqStore, 0, ip, 0, rLength-MINMATCH); + ip += rLength; + if (nextSeq) { + assert(nextSeq->litLength >= rLength); + nextSeq->litLength -= rLength; + } + continue; + } + break; + } + } else { + while ( (ip < repCheckLimit) + && ( (rep[1] > 0) && (rep[1] <= (U32)(ip - lowPrefixPtr)) + && (MEM_read32(ip) == MEM_read32(ip - rep[1])) )) { + + size_t const rLength = ZSTD_count(ip+4, ip+4-rep[1], + repCheckEnd) + 4; + /* Swap rep[1] <=> rep[0] */ + { + U32 const tmpOff = rep[1]; + rep[1] = rep[0]; + rep[0] = tmpOff; + } + + ZSTD_storeSeq(seqStore, 0, ip, 0, rLength-MINMATCH); + ip += rLength; + if (nextSeq) { + assert(nextSeq->litLength >= rLength); + nextSeq->litLength -= rLength; + } } - ip += repLength2; - anchor = ip; - continue; } - break; } } - - ZSTD_ldm_limitTableUpdate(ms, anchor); - ZSTD_ldm_fillFastTables(ms, cParams, anchor); - - /* Call the block compressor one last time on the last literals */ - lastLiterals = blockCompressor(ms, seqStore, rep, cParams, anchor, iend - anchor); - ms->nextToUpdate = (U32)(iend - base); - - /* Return the last literals size */ - return lastLiterals; + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, cParams, ip); + { + size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams, + ip, iend - ip); + ms->nextToUpdate = (U32)(iend - base); + return lastLiterals; + } } diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 7041c65a0..a836033a6 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -24,34 +24,57 @@ extern "C" { #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX #define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999 -/** ZSTD_compressBlock_ldm_generic() : +/** + * ZSTD_ldm_generateSequences(): * - * This is a block compressor intended for long distance matching. + * Generates the sequences using the long distance match finder. + * The sequences completely parse a prefix of the source, but leave off the last + * literals. Returns the number of sequences generated into `sequences`. * - * The function searches for matches of length at least - * ldmParams.minMatchLength using a hash table in cctx->ldmState. - * Matches can be at a distance of up to cParams.windowLog. + * NOTE: The source may be any size, assuming it doesn't overflow the hash table + * indices, and the output sequences table is large enough.. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeq* sequences, ZSTD_matchState_t const* ms, + ldmParams_t const* params, void const* src, size_t srcSize, + int const extDict); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * `nbSeq` is the number of sequences available in `sequences`. * - * Upon finding a match, the unmatched literals are compressed using a - * ZSTD_blockCompressor (depending on the strategy in the compression - * parameters), which stores the matched sequences. The "long distance" - * match is then stored with the remaining literals from the - * ZSTD_blockCompressor. */ -size_t ZSTD_compressBlock_ldm( - ldmState_t* ldms, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_CCtx_params const* params, void const* src, size_t srcSize); + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `sequences` and `nbSeq` + * appropriately. + */ +size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + int const extDict); -size_t ZSTD_compressBlock_ldm_extDict( - ldmState_t* ldms, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_CCtx_params const* params, void const* src, size_t srcSize); /** ZSTD_ldm_initializeParameters() : * Initialize the long distance matching parameters to their default values. */ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm); /** ZSTD_ldm_getTableSize() : - * Estimate the space needed for long distance matching tables. */ -size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog); + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); /** ZSTD_ldm_getTableSize() : * Return prime8bytes^(minMatchLength-1) */ @@ -62,8 +85,12 @@ U64 ZSTD_ldm_getHashPower(U32 minMatchLength); * windowLog and params->hashLog. * * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to - * params->hashLog if it is not). */ -void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog); + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); #if defined (__cplusplus) }