From 9b11b46f8aa1b3d09f4ab98bf9af9183d7080d76 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 1 Nov 2015 12:40:22 +0100 Subject: [PATCH] Fixed speed regression --- lib/zstdhc.c | 186 ++++++++++++++++++++++-------------------- programs/paramgrill.c | 21 +++-- 2 files changed, 112 insertions(+), 95 deletions(-) diff --git a/lib/zstdhc.c b/lib/zstdhc.c index 008820766..58615f8cc 100644 --- a/lib/zstdhc.c +++ b/lib/zstdhc.c @@ -191,7 +191,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 h, U32 mls) * HC Compression ***************************************/ /* Update chains up to ip (excluded) */ -static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) +static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; @@ -210,7 +210,7 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U3 } zc->nextToUpdate = target; - return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; + return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; } @@ -235,7 +235,7 @@ size_t ZSTD_HC_insertAndFindBestMatch ( size_t ml=0; /* HC4 match finder */ - matchIndex = ZSTD_HC_insertAndFindFirstIndex(zc, ip, matchLengthSearch); + matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch); while ((matchIndex>=lowLimit) && (nbAttempts)) { @@ -273,7 +273,7 @@ size_t ZSTD_HC_insertAndFindBestMatch ( } -static size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( +FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( ZSTD_HC_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, const BYTE** matchpos, @@ -289,7 +289,7 @@ static size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( } -size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -308,49 +308,73 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; /* Match Loop */ - while (ip < ilimit) + while (ip <= ilimit) { - /* repcode */ + size_t matchLength; + size_t offset; + const BYTE* start; + + /* try to find a first match */ if (MEM_read32(ip) == MEM_read32(ip - offset_2)) { - /* store sequence */ - size_t matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - size_t litLength = ip-anchor; - size_t offset = offset_2; + /* repcode : we take it*/ + size_t offtmp = offset_2; + size_t litLength = ip - anchor; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); offset_2 = offset_1; - offset_1 = offset; + offset_1 = offtmp; ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); ip += matchLength+MINMATCH; anchor = ip; continue; } - offset_2 = offset_1; /* failed once : necessarily offset_1 now */ + offset_2 = offset_1; + matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + if (!matchLength) { ip++; continue; } - /* repcode at ip+1 */ - if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) - { - size_t matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); - size_t litLength = ip+1-anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); - ip += 1+matchLength+MINMATCH; - anchor = ip; - continue; - } + /* let's try to find a better solution */ + offset = ip - match; + start = ip; - /* search */ + while (ip gain1) + { + matchLength = ml2, offset = 0, start = ip; + break; + } } + { + size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + size_t offset2 = ip - match; + int gain2 = (int)(ml2*5 - ZSTD_highbit((U32)offset2)); /* raw approx */ + int gain1 = (int)(matchLength*5 - ZSTD_highbit((U32)offset)); + if (gain2 > gain1) + { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } + } + + break; /* nothing found : store previous one */ } + + /* store sequence */ + { + size_t litLength = start - anchor; + if (offset) offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + ip = start + matchLength; + anchor = ip; + } + } /* Last Literals */ @@ -365,7 +389,8 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS seqStorePtr, srcSize); } -size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) + +size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -384,72 +409,49 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; /* Match Loop */ - while (ip <= ilimit) + while (ip < ilimit) { - size_t matchLength; - size_t offset; - const BYTE* start; - - /* try to find a first match */ + /* repcode */ if (MEM_read32(ip) == MEM_read32(ip - offset_2)) { - /* repcode : we take it*/ - size_t offtmp = offset_2; - size_t litLength = ip - anchor; - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + /* store sequence */ + size_t matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + size_t litLength = ip-anchor; + size_t offset = offset_2; offset_2 = offset_1; - offset_1 = offtmp; + offset_1 = offset; ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); ip += matchLength+MINMATCH; anchor = ip; continue; } - offset_2 = offset_1; - matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); - if (!matchLength) { ip++; continue; } - - /* let's try to find a better solution */ - offset = ip - match; - start = ip; + offset_2 = offset_1; /* failed once : necessarily offset_1 now */ - while (ip gain1) - { - matchLength = ml2, offset = 0, start = ip; - break; - } - } - { - size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); - size_t offset2 = ip - match; - int gain2 = ml2 - (ZSTD_highbit((U32)offset2) / 4); /* raw approx */ - int gain1 = matchLength - (ZSTD_highbit((U32)offset) / 4); - if (gain2 > gain1) - { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - break; /* nothing found : store previous one */ + size_t matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); + size_t litLength = ip+1-anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); + ip += 1+matchLength+MINMATCH; + anchor = ip; + continue; } - /* store sequence */ + /* search */ { - size_t litLength = start - anchor; - if (offset) offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - ip = start + matchLength; - anchor = ip; + size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + if (!matchLength) { ip++; continue; } + /* store sequence */ + { + size_t litLength = ip-anchor; + offset_1 = ip-match; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset_1, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + } } - } /* Last Literals */ @@ -461,15 +463,16 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz /* Final compression stage */ return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); } size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - if (ctx->params.strategy==ZSTD_HC_greedy) - return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize); - return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize); + if (ctx->params.strategy == ZSTD_HC_greedy) + return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize); + return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize); } @@ -483,10 +486,17 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; BYTE* const oend = op + maxDstSize; + size_t (*blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); + + if (ctxPtr->params.strategy == ZSTD_HC_greedy) + blockCompressor = ZSTD_HC_compressBlock_greedy; + else + blockCompressor = ZSTD_HC_compressBlock_lazy; + while (remaining > blockSize) { - size_t cSize = ZSTD_HC_compressBlock(ctxPtr, op+3, oend-op, ip, blockSize); + size_t cSize = blockCompressor(ctxPtr, op+3, oend-op, ip, blockSize); if (cSize == 0) { @@ -509,7 +519,7 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, /* last block */ { - size_t cSize = ZSTD_HC_compressBlock(ctxPtr, op+3, oend-op, ip, remaining); + size_t cSize = blockCompressor(ctxPtr, op+3, oend-op, ip, remaining); if (cSize == 0) { diff --git a/programs/paramgrill.c b/programs/paramgrill.c index e0dc6970f..acbb1650a 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -122,7 +122,8 @@ static U32 g_rand = 1; static U32 g_singleRun = 0; static U32 g_target = 0; static U32 g_noSeed = 0; -static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0, 0 }; +static const ZSTD_HC_parameters* g_seedParams = ZSTD_HC_defaultParameters; +static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0, ZSTD_HC_greedy }; void BMK_SetNbIterations(int nbLoops) { @@ -623,9 +624,9 @@ static void playAround(FILE* f, winnerInfo_t* winners, case 9: p.searchLength--; break; case 10: - p.strategy++; break; + p.strategy = ZSTD_HC_lazy; break; case 11: - p.strategy--; break; + p.strategy = ZSTD_HC_greedy; break; } } @@ -674,7 +675,7 @@ static void BMK_selectRandomStart( p.searchLog = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN; p.windowLog = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN; p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN; - p.strategy = FUZ_rand(&g_rand) & 1; + p.strategy = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) & 1); playAround(f, winners, p, srcBuffer, srcSize, ctx); } else @@ -682,8 +683,6 @@ static void BMK_selectRandomStart( } -static const ZSTD_HC_parameters* g_seedParams = ZSTD_HC_defaultParameters; - static void BMK_benchMem(void* srcBuffer, size_t srcSize) { ZSTD_HC_CCtx* ctx = ZSTD_HC_createCCtx(); @@ -968,12 +967,20 @@ int main(int argc, char** argv) while ((*argument>= '0') && (*argument<='9')) g_params.searchLog *= 10, g_params.searchLog += *argument++ - '0'; continue; - case 'l': + case 'l': /* search length */ g_params.searchLength = 0; argument++; while ((*argument>= '0') && (*argument<='9')) g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0'; continue; + case 't': /* strategy */ + g_params.strategy = ZSTD_HC_greedy; + argument++; + while ((*argument>= '0') && (*argument<='9')) + { + if (*argument++) g_params.strategy = ZSTD_HC_lazy; + } + continue; case 'L': { int cLevel = 0; -- 2.47.2