From: inikep Date: Thu, 7 Apr 2016 17:14:09 +0000 (+0200) Subject: only repcode 0 for lazy, lazy2 and btlazy2 strategies X-Git-Tag: v0.6.1^2~39^2~31 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=64d7bcb0dc597ede1a33848477b23f00ca8839b6;p=thirdparty%2Fzstd.git only repcode 0 for lazy, lazy2 and btlazy2 strategies --- diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index d353c3936..ab2a2d908 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1245,165 +1245,6 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, -/* *********************** -* Hash Chain -*************************/ - -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) -{ - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - U32* const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) { - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - zc->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) -{ - U32* const hashTable3 = zc->hashTable3; - U32 const hashLog3 = zc->hashLog3; - const BYTE* const base = zc->base; - U32 idx = zc->nextToUpdate3; - const U32 target = zc->nextToUpdate3 = (U32)(ip - base); - const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - return hashTable3[hash3]; -} - - -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls, const U32 extDict) -{ - U32* const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.cParams.chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = zc->lowLimit; - const U32 current = (U32)(ip-base); - const U32 minChain = current > chainSize ? current - chainSize : 0; - int nbAttempts=maxNbAttempts; - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t ml=minMatch-1; - -#if 0 - if (minMatch == 3) { /* HC3 match finder */ - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>lowLimit && current - matchIndex3<(1<<18)) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[ml] == ip[ml]) currentMl = ZSTD_count(ip, match, iLimit); /* potentially better */ - } else { - match = dictBase + matchIndex3; - if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex3; - if (ip+currentMl == iLimit) return (ml>=MINMATCH) ? ml : 0; /* best possible, and avoid read overflow*/ - } } - } -#endif - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - - for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex; - if (MEM_readMINMATCH(match, minMatch) == MEM_readMINMATCH(ip, minMatch)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+minMatch, match+minMatch, iLimit, dictEnd, prefixStart) + minMatch; - } - - /* save best solution */ - if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return (ml>=minMatch) ? ml : 0; -} - - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 3, 0); - default : - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); - } -} - - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 3, 1); - default : - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); - } -} - - - /*-************************************* * Binary Tree search ***************************************/ @@ -1535,29 +1376,7 @@ static size_t ZSTD_insertBtAndFindBestMatch ( U32* largerPtr = bt + 2*(current&btMask) + 1; U32 matchEndIdx = current+8; U32 dummy32; /* to be nullified at the end */ - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatch-1; - - if (minMatch == 3) { /* HC3 match finder */ - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iend); - } else { - match = dictBase + matchIndex3; - if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iend, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currentMl > bestLength) { - bestLength = currentMl, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex3; - if (ip+currentMl == iend) goto update; /* best possible, and avoid read overflow*/ - } - } } + size_t bestLength = 0; hashTable[h] = current; /* Update Hash Table */ @@ -1604,9 +1423,8 @@ static size_t ZSTD_insertBtAndFindBestMatch ( *smallerPtr = *largerPtr = 0; -update: zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; - return (bestLength>=minMatch) ? bestLength : 0; + return bestLength; } @@ -1641,7 +1459,6 @@ static size_t ZSTD_BtFindBestMatch_selectMLS ( { switch(matchLengthSearch) { - case 3 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 3); default : case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); @@ -1659,6 +1476,8 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); } + + /** Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch_extDict ( ZSTD_CCtx* zc, @@ -1680,7 +1499,6 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( { switch(matchLengthSearch) { - case 3 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 3); default : case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); @@ -1690,192 +1508,120 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( -/* ******************************* -* Greedy parser -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_greedy_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base + ctx->dictLimit; - - const U32 maxSearches = 1 << ctx->params.cParams.searchLog; - const U32 mls = ctx->params.cParams.searchLength; - /* init */ - U32 rep[ZSTD_REP_INIT]; - for (U32 i=0; i matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } +/* *********************** +* Hash Chain +*************************/ - /* catch up */ - while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ - { start--; matchLength++; } - rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] -_storeSequence: - /* store sequence */ - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; - offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } +/* Update chains up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); + while(idx < target) { + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; } + + zc->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; } -FORCE_INLINE -void ZSTD_compressBlock_greedy_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls, const U32 extDict) { - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->params.cParams.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ctx->lowLimit; - - const U32 maxSearches = 1 << ctx->params.cParams.searchLog; - const U32 mls = ctx->params.cParams.searchLength; - - /* init */ - U32 rep[ZSTD_REP_INIT]; - for (U32 i=0; ilowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + int nbAttempts=maxNbAttempts; + size_t ml=EQUAL_READ32-1; - ZSTD_resetSeqStore(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(start-base); + for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; + } - /* check repCode */ - { - const U32 repIndex = (U32)(current - rep[0]); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - if (MEM_read32(start) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(start+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - goto _storeSequence; - } } + /* save best solution */ + if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = ZSTD_HcFindBestMatch_extDict_selectMLS(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } - if (matchLength < EQUAL_READ32) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + return ml; +} - /* catch up */ - if (offset >= ZSTD_REP_NUM) { - U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } - rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); - } -_storeSequence: - /* store sequence */ - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - rep[1]); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } } - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; +FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); } } +/* The optimal parser */ +#include "zstd_opt.h" /* ******************************* @@ -1896,7 +1642,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const U32 maxSearches = 1 << ctx->params.cParams.searchLog; const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (mls == 3) ? 3 : 4; typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, @@ -1916,22 +1661,13 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, while (ip < ilimit) { size_t matchLength=0; size_t offset=0; - const BYTE* start=ip; + const BYTE* start=ip+1; /* check repCode */ - for (U32 i=0; i gain1) - matchLength = mlRep, offset = i; - } + matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; + if (depth==0) goto _storeSequence; } /* first search (depth 0) */ @@ -1941,7 +1677,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, matchLength = ml2, start = ip, offset=offsetFound; } - if (matchLength < MINMATCH) { + if (matchLength < EQUAL_READ32) { ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ continue; } @@ -1950,19 +1686,18 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (depth>=1) while (ip= MINMATCH) && (gain2 > gain1)) - matchLength = mlRep, offset = i, start = ip; + int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; } { size_t offset2=99999999; size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); - if ((ml2 >= MINMATCH) && (gain2 > gain1)) { + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; /* search a better one */ } } @@ -1970,19 +1705,18 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* let's find an even better one */ if ((depth==2) && (ip= MINMATCH) && (gain2 > gain1)) - matchLength = ml2, offset = i, start = ip; + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; } { size_t offset2=99999999; size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); - if ((ml2 >= MINMATCH) && (gain2 > gain1)) { + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; } } } @@ -1990,34 +1724,30 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } /* catch up */ - if (offset >= ZSTD_REP_NUM) { + if (offset) { while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ { start--; matchLength++; } + rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); } /* store sequence */ - { - if (offset >= ZSTD_REP_NUM) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = (U32)(offset - ZSTD_REP_MOVE); - } else { - if (offset != 0) { - U32 temp = rep[offset]; - if (offset > 1) rep[2] = rep[1]; - if (offset > 0) rep[1] = rep[0]; - rep[0] = temp; - } - - if (offset<=1 && start==anchor) offset = 1-offset; - } - - size_t const litLength = start - anchor; - +_storeSequence: + { size_t const litLength = start - anchor; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } - } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; + offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -2028,9 +1758,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } -/* The optimal parser */ -#include "zstd_opt.h" - static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { @@ -2054,7 +1781,7 @@ static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcS static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_greedy_generic(ctx, src, srcSize); + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); } @@ -2078,7 +1805,6 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 maxSearches = 1 << ctx->params.cParams.searchLog; const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (mls == 3) ? 3 : 4; typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, @@ -2098,28 +1824,20 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, while (ip < ilimit) { size_t matchLength=0; size_t offset=0; - const BYTE* start=ip; + const BYTE* start=ip+1; U32 current = (U32)(ip-base); /* check repCode */ - for (U32 i=0; i= 3) /* intentional overflow */ - if (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) { + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - if (matchLength==0) { - offset = i; - matchLength = ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - } else { - size_t mlRep = ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - int gain2 = (int)(mlRep * 3 /*- ZSTD_highbit((U32)i+1)*/ + (i==1)); - int gain1 = (int)(matchLength*3 - /*ZSTD_highbit((U32)offset+1)*/ + 1 + (offset==1)); - if (gain2 > gain1) - matchLength = mlRep, offset = i; - } + matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + if (depth==0) goto _storeSequence; } } /* first search (depth 0) */ @@ -2129,7 +1847,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, matchLength = ml2, start = ip, offset=offsetFound; } - if (matchLength < MINMATCH) { + if (matchLength < EQUAL_READ32) { ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ continue; } @@ -2140,19 +1858,19 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, ip ++; current++; /* check repCode */ - for (U32 i=0; i= 3) /* intentional overflow */ - if (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) { + if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1 + (offset= MINMATCH) && (gain2 > gain1)) - matchLength = repLength, offset = i, start = ip; + int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; } } /* search match, depth 1 */ @@ -2160,7 +1878,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); - if ((ml2 >= MINMATCH) && (gain2 > gain1)) { + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; /* search a better one */ } } @@ -2170,19 +1888,19 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, ip ++; current++; /* check repCode */ - for (U32 i=0; i= 3) /* intentional overflow */ - if (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) { + if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1 + (offset= MINMATCH) && (gain2 > gain1)) - matchLength = repLength, offset = i, start = ip; + size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int gain2 = (int)(repLength * 4); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; } } /* search match, depth 2 */ @@ -2190,7 +1908,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); - if ((ml2 >= MINMATCH) && (gain2 > gain1)) { + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; } } } @@ -2198,33 +1916,38 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, } /* catch up */ - if (offset >= ZSTD_REP_NUM) { + if (offset) { U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); } /* store sequence */ - { - if (offset >= ZSTD_REP_NUM) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = (U32)(offset - ZSTD_REP_MOVE); - } else { - if (offset != 0) { - U32 temp = rep[offset]; - if (offset > 1) rep[2] = rep[1]; - if (offset > 0) rep[1] = rep[0]; - rep[0] = temp; - } - - if (offset<=1 && start==anchor) offset = 1-offset; - } - - size_t const litLength = start - anchor; +_storeSequence: + { size_t const litLength = start - anchor; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - rep[1]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; } } /* Last Literals */ @@ -2234,9 +1957,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, } } + void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_greedy_extDict_generic(ctx, src, srcSize); + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); } static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) @@ -2265,7 +1989,7 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { static const ZSTD_blockCompressor blockCompressor[2][6] = { -#if 1 +#if 0 { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, #else { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, @@ -2799,21 +2523,12 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 14, 14, 14, 1, 4, 4, ZSTD_fast }, /* level 1 */ { 14, 14, 15, 1, 4, 4, ZSTD_fast }, /* level 2 */ { 14, 14, 14, 4, 4, 4, ZSTD_greedy }, /* level 3.*/ -#if 0 - { 14, 14, 14, 3, 3, 4, ZSTD_lazy }, /* level 4.*/ - { 14, 14, 14, 4, 3, 4, ZSTD_lazy2 }, /* level 5 */ - { 14, 14, 14, 5, 3, 4, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 3, 4, ZSTD_lazy2 }, /* level 7.*/ - { 14, 14, 14, 7, 3, 4, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 6, 3, 4, ZSTD_btlazy2 }, /* level 9.*/ -#else { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 4.*/ { 14, 14, 14, 4, 4, 4, ZSTD_lazy2 }, /* level 5 */ { 14, 14, 14, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ { 14, 14, 14, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ { 14, 14, 14, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ { 14, 15, 14, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ -#endif { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 2c7df9f59..30b81e6f1 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -212,6 +212,28 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + U32 const hashLog3 = zc->hashLog3; + const BYTE* const base = zc->base; + U32 idx = zc->nextToUpdate3; + const U32 target = zc->nextToUpdate3 = (U32)(ip - base); + const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + /*-************************************* * Binary Tree search ***************************************/ diff --git a/programs/Makefile b/programs/Makefile index 47621caa7..f5d5adb94 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -84,10 +84,6 @@ default: zstd all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 paramgrill datagen -zstd_clang: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \ - zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c - clang.exe --target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT) - zstd : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \ zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c $(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)