From: inikep Date: Tue, 22 Mar 2016 10:56:22 +0000 (+0100) Subject: improved 4reps in ZSTD_compressBlock_lazy_generic X-Git-Tag: v0.6.0^2~17^2~16^2~8^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2b942881ac258727cbc9870e27c7d60206350a17;p=thirdparty%2Fzstd.git improved 4reps in ZSTD_compressBlock_lazy_generic kSlotNew = 0 --- diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 397e76189..e9b9d4a6e 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1617,8 +1617,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (depth==0) goto _storeSequence; } else { size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[i], iend) + MINMATCH; - int gain2 = (int)(mlRep * 3 /*- ZSTD_highbit((U32)i+1)*/); - int gain1 = (int)(matchLength*3 - /*ZSTD_highbit((U32)offset+1)*/ + 1); + int gain2 = (int)(mlRep * 3 /*- ZSTD_highbit((U32)i+1)*/ + (i==1)); + int gain1 = (int)(matchLength*3 - /*ZSTD_highbit((U32)offset+1)*/ + 1 + (offset==1)); if (gain2 > gain1) matchLength = mlRep, offset = i; } @@ -1642,10 +1642,10 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, while (ip= ZSTD_REP_NUM) && (MEM_read32(ip) == MEM_read32(ip - rep[i]))) { + if (MEM_read32(ip) == MEM_read32(ip - rep[i])) { size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[i], iend) + MINMATCH; int gain2 = (int)(mlRep * 3); - int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1 + (offset= MINMATCH) && (gain2 > gain1)) matchLength = mlRep, offset = i, start = ip; } @@ -1663,10 +1663,10 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if ((depth==2) && (ip= ZSTD_REP_NUM) && (MEM_read32(ip) == MEM_read32(ip - rep[i]))) { + if (MEM_read32(ip) == MEM_read32(ip - rep[i])) { size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[i], iend) + MINMATCH; int gain2 = (int)(ml2 * 4); - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1 + (offset= MINMATCH) && (gain2 > gain1)) matchLength = ml2, offset = i, start = ip; } @@ -1693,7 +1693,7 @@ _storeSequence: { #if ZSTD_REP_NUM == 4 if (offset >= ZSTD_REP_NUM) { -#if 0 +#if 1 rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index f0475bf25..651087c4d 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -691,19 +691,26 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offset == 3) seqState->prevOffset[3] = seqState->prevOffset[2]; seqState->prevOffset[2] = seqState->prevOffset[1]; } - offset = temp; seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; + seqState->prevOffset[0] = offset = temp; + } else { offset = seqState->prevOffset[0]; } } else { offset -= ZSTD_REP_MOVE; +#if 1 // faster without kSlotNew + seqState->prevOffset[3] = seqState->prevOffset[2]; + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; +#else if (kSlotNew < 3) seqState->prevOffset[3] = seqState->prevOffset[2]; if (kSlotNew < 2) seqState->prevOffset[2] = seqState->prevOffset[1]; if (kSlotNew < 1) seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[kSlotNew] = offset; +#endif } #else // ZSTD_REP_NUM == 1 #if 0 diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 0abdcdfa9..5809f8754 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -71,7 +71,7 @@ #else #define ZSTD_REP_NUM 1 #define ZSTD_REP_INIT 2 - #define ZSTD_REP_MOVE 0//(ZSTD_REP_NUM-1) + #define ZSTD_REP_MOVE 0 //(ZSTD_REP_NUM-1) #endif #define KB *(1 <<10) diff --git a/programs/Makefile b/programs/Makefile index ca2ba99fc..cd432f790 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -46,7 +46,7 @@ DESTDIR?= PREFIX ?= /usr/local CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\" CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial -CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef +CFLAGS += -std=gnu99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) BINDIR = $(PREFIX)/bin diff --git a/programs/bench.c b/programs/bench.c index 5808c2ecc..20f2ee0ab 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -52,14 +52,14 @@ #include /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ -#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* clock_t, clock, nanosleep, CLOCKS_PER_SEC */ #ifdef WINDOWS - #define mili_sleep(mili) Sleep(mili) + #define mili_sleep(mili) Sleep(mili) #else - #define mili_sleep(mili) usleep(mili*1000) #include /* setpriority */ + #define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000L; nanosleep(&t, NULL); } #endif - + /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) # include @@ -379,11 +379,11 @@ _findError: if (crcOrig == crcCheck) { - DISPLAYLEVEL(2, "%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); result->ratio = ratio; result->cSize = cSize; - result->cSpeed = (double)srcSize / fastestC / 1000.; - result->dSpeed = (double)srcSize / fastestD / 1000.; + result->cSpeed = (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC); + result->dSpeed = (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC); + DISPLAYLEVEL(2, "%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, result->cSpeed, result->dSpeed); } else DISPLAYLEVEL(2, "%2i-\n", cLevel); @@ -449,7 +449,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, dictBuffer, dictBufferSize, &result); if (g_displayLevel == 1) { if (additionalParam) - DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (p=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); + DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); else DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); total.cSize += result.cSize;