From: Yann Collet Date: Tue, 3 Nov 2015 08:49:30 +0000 (+0100) Subject: level tuning X-Git-Tag: zstd-0.3.3^2~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=050efba81b862f790670f72fdd92aa6d98539b14;p=thirdparty%2Fzstd.git level tuning --- diff --git a/lib/zstdhc.c b/lib/zstdhc.c index e6de28d4d..693c713b2 100644 --- a/lib/zstdhc.c +++ b/lib/zstdhc.c @@ -184,7 +184,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 h, U32 mls) } } -#define NEXT_IN_CHAIN(d) chainTable[(d) & chainMask] /* flexible, CHAINSIZE dependent */ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] /* ************************************* @@ -204,7 +204,7 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U while(idx < target) { size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx) = hashTable[h]; + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; hashTable[h] = idx; idx++; } @@ -217,8 +217,8 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ size_t ZSTD_HC_insertAndFindBestMatch ( ZSTD_HC_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - const BYTE** matchpos, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) { U32* const chainTable = zc->chainTable; @@ -237,17 +237,22 @@ size_t ZSTD_HC_insertAndFindBestMatch ( /* HC4 match finder */ matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch); - while ((matchIndex>=lowLimit) && (nbAttempts)) + while ((matchIndex>lowLimit) && (nbAttempts)) { nbAttempts--; if (matchIndex >= dictLimit) { match = base + matchIndex; - if (*(match+ml) == *(ip+ml) - && (MEM_read32(match) == MEM_read32(ip))) + if ( (match[ml] == ip[ml]) + && (MEM_read32(match) == MEM_read32(ip)) ) /* ensures minimum match of 4 */ { const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = match; if (ip+ml >= iLimit) break; } + if (mlt > ml) + //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1)))) + { + ml = mlt; *offsetPtr = ip-match; + if (ip+ml >= iLimit) break; + } } } else @@ -261,12 +266,12 @@ size_t ZSTD_HC_insertAndFindBestMatch ( mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; } } } if (base + matchIndex <= ip - chainSize) break; - matchIndex = NEXT_IN_CHAIN(matchIndex); + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } return ml; @@ -276,15 +281,15 @@ size_t ZSTD_HC_insertAndFindBestMatch ( FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( ZSTD_HC_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, - const BYTE** matchpos, + size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) { switch(matchLengthSearch) { default : - case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, matchpos, maxNbAttempts, 4); - case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, matchpos, maxNbAttempts, 5); - case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, matchpos, maxNbAttempts, 6); + case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); } } @@ -297,7 +302,6 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - const BYTE* match = istart; size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; @@ -311,7 +315,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs while (ip <= ilimit) { size_t matchLength; - size_t offset; + size_t offset=999999; const BYTE* start; /* try to find a first match */ @@ -330,11 +334,10 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs } offset_2 = offset_1; - matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (!matchLength) { ip++; continue; } /* let's try to find a better solution */ - offset = ip - match; start = ip; while (ip gain1) @@ -360,6 +363,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs } } + /* let's find an even better one */ if (ip gain1) matchLength = ml2, offset = 0, start = ip; } { - size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); - size_t offset2 = ip - match; + size_t offset2=999999; + size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls); int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 8); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if (gain2 > gain1) { matchLength = ml2, offset = offset2, start = ip; @@ -383,7 +387,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs } } } - break; /* nothing found : store previous one */ + break; /* nothing found : store previous solution */ } /* store sequence */ @@ -418,7 +422,6 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - const BYTE* match = istart; size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; @@ -432,7 +435,7 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz while (ip <= ilimit) { size_t matchLength; - size_t offset; + size_t offset=0; const BYTE* start; /* try to find a first match */ @@ -451,11 +454,10 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz } offset_2 = offset_1; - matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (!matchLength) { ip++; continue; } /* let's try to find a better solution */ - offset = ip - match; start = ip; while (ip gain1) @@ -519,7 +521,6 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - const BYTE* match = istart; size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; @@ -562,12 +563,13 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS /* search */ { - size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + size_t offset=999999; + size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (!matchLength) { ip++; continue; } /* store sequence */ { size_t litLength = ip-anchor; - offset_1 = ip-match; + offset_1 = offset; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset_1, matchLength-MINMATCH); ip += matchLength; anchor = ip; @@ -591,9 +593,17 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - if (ctx->params.strategy == ZSTD_HC_greedy) - return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize); - return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize); + switch(ctx->params.strategy) + { + case ZSTD_HC_greedy: + return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize); + case ZSTD_HC_lazy: + return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize); + case ZSTD_HC_lazydeep: + return ZSTD_HC_compressBlock_lazydeep(ctx, dst, maxDstSize, src, srcSize); + default : + return ERROR(GENERIC); /* unknown block compressor */ + } } diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h index 40d0b60c8..db185b400 100644 --- a/lib/zstdhc_static.h +++ b/lib/zstdhc_static.h @@ -53,7 +53,7 @@ typedef struct U32 hashLog; /* dispatch table : larger == more memory, faster*/ U32 searchLog; /* nb of searches : larger == more compression, slower*/ U32 searchLength; /* size of matches : larger == faster decompression */ - ZSTD_HC_strategy strategy; /* greedy, lazy (stronger, slower) */ + ZSTD_HC_strategy strategy; /* greedy, lazy, lazydeep */ } ZSTD_HC_parameters; /* parameters boundaries */ @@ -73,7 +73,7 @@ typedef struct * Advanced function ***************************************/ /** ZSTD_HC_compress_advanced -* Same as ZSTD_HC_compressCCtx(), but can fine-tune each compression parameter */ +* Same as ZSTD_HC_compressCCtx(), with fine-tune control of each compression parameter */ size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, @@ -94,35 +94,36 @@ size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize); #define ZSTD_HC_MAX_CLEVEL 26 static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = { /* W, C, H, S, L, strat */ - { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */ - { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */ - { 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */ - { 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */ - { 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */ - { 20, 19, 19, 3, 5, ZSTD_HC_greedy }, /* level 5 */ - { 20, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */ - { 20, 18, 20, 4, 5, ZSTD_HC_lazy }, /* level 7 */ - { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */ - { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */ - { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ - { 21, 21, 20, 5, 5, ZSTD_HC_lazy }, /* level 11 */ - { 22, 20, 22, 6, 5, ZSTD_HC_lazy }, /* level 12 */ - { 22, 21, 22, 6, 5, ZSTD_HC_lazy }, /* level 13 */ - { 23, 21, 22, 6, 5, ZSTD_HC_lazy }, /* level 14 */ - { 23, 21, 23, 7, 5, ZSTD_HC_lazy }, /* level 15 */ - { 23, 22, 22, 6, 5, ZSTD_HC_lazy }, /* level 16 */ - { 23, 22, 22, 7, 5, ZSTD_HC_lazy }, /* level 17 */ - { 23, 23, 22, 7, 5, ZSTD_HC_lazy }, /* level 18 */ - { 23, 22, 23, 8, 5, ZSTD_HC_lazy }, /* level 19 */ - { 23, 23, 23, 8, 5, ZSTD_HC_lazy }, /* level 20 */ - { 23, 23, 23, 8, 5, ZSTD_HC_lazy }, /* level 21 */ - { 24, 24, 24, 8, 5, ZSTD_HC_lazy }, /* level 22 */ - { 24, 23, 23, 9, 5, ZSTD_HC_lazy }, /* level 23 */ - { 24, 24, 24, 9, 5, ZSTD_HC_lazy }, /* level 24 */ - { 24, 24, 24, 9, 5, ZSTD_HC_lazy }, /* level 25 */ - { 24, 24, 24, 10, 5, ZSTD_HC_lazy }, /* level 26 */ /* ZSTD_HC_MAX_CLEVEL */ + { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */ + { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */ + { 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */ + { 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */ + { 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */ + { 20, 18, 19, 2, 5, ZSTD_HC_lazy }, /* level 5 */ + { 20, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */ + { 20, 18, 20, 4, 5, ZSTD_HC_lazy }, /* level 7 */ + { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */ + { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */ + { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ + { 21, 20, 20, 5, 5, ZSTD_HC_lazydeep }, /* level 11 */ + { 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */ + { 22, 20, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 13 */ + { 21, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 14 */ + { 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 15 */ + { 22, 21, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 16 */ + { 23, 21, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 17 */ + { 23, 22, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 18 */ + { 23, 22, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 19 */ + { 23, 22, 23, 8, 5, ZSTD_HC_lazydeep }, /* level 20 */ + { 23, 22, 23, 8, 5, ZSTD_HC_lazydeep }, /* level 21 */ + { 23, 23, 24, 8, 5, ZSTD_HC_lazydeep }, /* level 22 */ + { 24, 24, 24, 8, 5, ZSTD_HC_lazydeep }, /* level 23 */ + { 23, 23, 23, 9, 5, ZSTD_HC_lazydeep }, /* level 24 */ + { 24, 23, 23, 9, 5, ZSTD_HC_lazydeep }, /* level 25 */ + { 24, 24, 24, 9, 5, ZSTD_HC_lazydeep }, /* level 26 */ }; + #if defined (__cplusplus) } #endif diff --git a/programs/bench.c b/programs/bench.c index 85c3700b4..8dab71265 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -83,7 +83,7 @@ #define MB *(1 <<20) #define GB *(1U<<30) -#define MAX_MEM (2 GB - 64 MB) +static const size_t maxMemory = sizeof(size_t)==4 ? (2 GB - 64 MB) : 8ULL GB; #define DEFAULT_CHUNKSIZE (4 MB) static U32 g_compressibilityDefault = 50; @@ -401,7 +401,7 @@ static size_t BMK_findMaxMem(U64 requiredMem) requiredMem = (((requiredMem >> 26) + 1) << 26); requiredMem += 2 * step; - if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + if (requiredMem > maxMemory) requiredMem = maxMemory; while (!testmem) {