From: Yann Collet Date: Mon, 2 Nov 2015 15:14:46 +0000 (+0100) Subject: lazydeep X-Git-Tag: zstd-0.3.3^2~13 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3564487614491f5a1e799e64616f7c50d8ab5746;p=thirdparty%2Fzstd.git lazydeep --- diff --git a/Makefile b/Makefile index 7f90fd295..34d425e09 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ # ################################################################ # Version number -export VERSION := 0.3.2 +export VERSION := 0.3.3 PRGDIR = programs ZSTDDIR = lib diff --git a/NEWS b/NEWS index 9693d7793..ee8c47bea 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +v0.3.3 +Small compression ratio improvement + v0.3.2 Fixed Visual Studio diff --git a/lib/zstd.h b/lib/zstd.h index d17bc4a33..d79410d15 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -48,7 +48,7 @@ extern "C" { ***************************************/ #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ #define ZSTD_VERSION_MINOR 3 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) unsigned ZSTD_versionNumber (void); diff --git a/lib/zstdhc.c b/lib/zstdhc.c index 58615f8cc..e6de28d4d 100644 --- a/lib/zstdhc.c +++ b/lib/zstdhc.c @@ -289,6 +289,127 @@ FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( } +size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* match = istart; + + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1 << ctx->params.searchLog; + const U32 mls = ctx->params.searchLength; + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip <= ilimit) + { + size_t matchLength; + size_t offset; + const BYTE* start; + + /* try to find a first match */ + if (MEM_read32(ip) == MEM_read32(ip - offset_2)) + { + /* repcode : we take it*/ + size_t offtmp = offset_2; + size_t litLength = ip - anchor; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + offset_2 = offset_1; + offset_1 = offtmp; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); + ip += matchLength+MINMATCH; + anchor = ip; + continue; + } + + offset_2 = offset_1; + matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + if (!matchLength) { ip++; continue; } + + /* let's try to find a better solution */ + offset = ip - match; + start = ip; + + while (ip gain1) + matchLength = ml2, offset = 0, start = ip; + } + { + size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + size_t offset2 = ip - match; + int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + if (gain2 > gain1) + { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } + } + + if (ip gain1) + matchLength = ml2, offset = 0, start = ip; + } + { + size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + size_t offset2 = ip - match; + int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 8); + if (gain2 > gain1) + { + matchLength = ml2, offset = offset2, start = ip; + continue; + } + } + } + break; /* nothing found : store previous one */ + } + + /* store sequence */ + { + size_t litLength = start - anchor; + if (offset) offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + ip = start + matchLength; + anchor = ip; + } + + } + + /* Last Literals */ + { + size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } + + /* Final compression stage */ + return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); +} + + size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); @@ -343,19 +464,19 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz if (MEM_read32(ip) == MEM_read32(ip - offset_1)) { size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH; - int gain2 = (int)(ml2 * 4); - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset)); + int gain2 = (int)(ml2 * 3); + int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); if (gain2 > gain1) { matchLength = ml2, offset = 0, start = ip; - break; + } } { size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); size_t offset2 = ip - match; - int gain2 = (int)(ml2*5 - ZSTD_highbit((U32)offset2)); /* raw approx */ - int gain1 = (int)(matchLength*5 - ZSTD_highbit((U32)offset)); + int gain2 = (int)(ml2*3 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 3); if (gain2 > gain1) { matchLength = ml2, offset = offset2, start = ip; @@ -488,11 +609,20 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, BYTE* const oend = op + maxDstSize; size_t (*blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); - if (ctxPtr->params.strategy == ZSTD_HC_greedy) - blockCompressor = ZSTD_HC_compressBlock_greedy; - else - blockCompressor = ZSTD_HC_compressBlock_lazy; - + switch(ctxPtr->params.strategy) + { + case ZSTD_HC_greedy: + blockCompressor = ZSTD_HC_compressBlock_greedy; + break; + case ZSTD_HC_lazy: + blockCompressor = ZSTD_HC_compressBlock_lazy; + break; + case ZSTD_HC_lazydeep: + blockCompressor = ZSTD_HC_compressBlock_lazydeep; + break; + default : + return ERROR(GENERIC); /* unknown block compressor */ + } while (remaining > blockSize) { diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h index cbc938540..40d0b60c8 100644 --- a/lib/zstdhc_static.h +++ b/lib/zstdhc_static.h @@ -45,7 +45,7 @@ extern "C" { /* ************************************* * Types ***************************************/ -typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy } ZSTD_HC_strategy; +typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazydeep } ZSTD_HC_strategy; typedef struct { U32 windowLog; /* largest match distance : impact decompression buffer size */ diff --git a/programs/Makefile b/programs/Makefile index 82125c994..8c7003232 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -VERSION?= 0.3.2 +VERSION?= 0.3.3 DESTDIR?= PREFIX ?= /usr/local diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 4e84d80ae..d27a22b5f 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -425,16 +425,16 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, return 0; } -const char* g_stratName[2] = { "ZSTD_HC_greedy", "ZSTD_HC_lazy " }; +const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_lazydeep" }; static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize) { DISPLAY("\r%79s\r", ""); - fprintf(f," {%3u,%3u,%3u,%3u,%3u, %s }, ", + fprintf(f," {%3u,%3u,%3u,%3u,%3u, %s }, ", params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, g_stratName[params.strategy]); fprintf(f, - "/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */ \n", + "/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.); } @@ -576,7 +576,7 @@ static BYTE g_alreadyTested[ZSTD_HC_WINDOWLOG_MAX+1-ZSTD_HC_WINDOWLOG_MIN] [ZSTD_HC_HASHLOG_MAX+1-ZSTD_HC_HASHLOG_MIN] [ZSTD_HC_SEARCHLOG_MAX+1-ZSTD_HC_SEARCHLOG_MIN] [ZSTD_HC_SEARCHLENGTH_MAX+1-ZSTD_HC_SEARCHLENGTH_MIN] - [2 /* strategy */ ] = {}; /* init to zero */ + [3 /* strategy */ ] = {}; /* init to zero */ #define NB_TESTS_PLAYED(p) \ g_alreadyTested[p.windowLog-ZSTD_HC_WINDOWLOG_MIN] \ @@ -628,9 +628,9 @@ static void playAround(FILE* f, winnerInfo_t* winners, case 9: p.searchLength--; break; case 10: - p.strategy = ZSTD_HC_lazy; break; + p.strategy = (ZSTD_HC_strategy)(((U32)p.strategy)+1); break; case 11: - p.strategy = ZSTD_HC_greedy; break; + p.strategy = (ZSTD_HC_strategy)(((U32)p.strategy)-1); break; } } @@ -647,7 +647,7 @@ static void playAround(FILE* f, winnerInfo_t* winners, if (p.searchLength > ZSTD_HC_SEARCHLENGTH_MAX) continue; if (p.searchLength < ZSTD_HC_SEARCHLENGTH_MIN) continue; if (p.strategy < ZSTD_HC_greedy) continue; - if (p.strategy > ZSTD_HC_lazy) continue; + if (p.strategy > ZSTD_HC_lazydeep) continue; /* exclude faster if already played params */ if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1)) @@ -680,7 +680,7 @@ static void BMK_selectRandomStart( p.searchLog = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN; p.windowLog = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN; p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN; - p.strategy = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) & 1); + p.strategy = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) % 3); playAround(f, winners, p, srcBuffer, srcSize, ctx); } else diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 5bfe25b62..b22095983 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -70,7 +70,7 @@ **************************************/ #define COMPRESSOR_NAME "zstd command line interface" #ifndef ZSTD_VERSION -# define ZSTD_VERSION "v0.3.0" +# define ZSTD_VERSION "v0.3.3" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__