From: Yann Collet Date: Mon, 21 Oct 2024 21:56:43 +0000 (-0700) Subject: added a faster block splitter variant X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fsplit5;p=thirdparty%2Fzstd.git added a faster block splitter variant that samples 1 in 5 positions. This variant is fast enough for lazy2 and btlazy2, but it's less good in combination with post-splitter at higher levels (>= btopt). --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fcef55bf1..e7a07a484 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4500,8 +4500,10 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src return MIN(srcSize, blockSizeMax); /* dynamic splitting has a cpu cost for analysis, * due to that cost it's only used for btlazy2+ strategies */ - if (strat >= ZSTD_btlazy2) - return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, cctx->tmpWorkspace, cctx->tmpWkspSize); + if (strat >= ZSTD_btopt) + return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize); + if (strat >= ZSTD_lazy2) + return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize); /* blind split strategy * no cpu cost, but can over-split homegeneous data. * heuristic, tested as being "generally better". diff --git a/lib/compress/zstd_preSplit.c b/lib/compress/zstd_preSplit.c index f831dabde..721a52deb 100644 --- a/lib/compress/zstd_preSplit.c +++ b/lib/compress/zstd_preSplit.c @@ -46,22 +46,36 @@ static void initStats(FPStats* fpstats) ZSTD_memset(fpstats, 0, sizeof(FPStats)); } -static void addToFingerprint(FingerPrint* fp, const void* src, size_t s) +FORCE_INLINE_TEMPLATE void addEvents_generic(FingerPrint* fp, const void* src, size_t srcSize, size_t samplingRate) { const char* p = (const char*)src; - size_t limit = s - HASHLENGTH + 1; + size_t limit = srcSize - HASHLENGTH + 1; size_t n; - assert(s >= HASHLENGTH); - for (n = 0; n < limit; n++) { - fp->events[hash2(p++)]++; + assert(srcSize >= HASHLENGTH); + for (n = 0; n < limit; n+=samplingRate) { + fp->events[hash2(p+n)]++; } - fp->nbEvents += limit; + fp->nbEvents += limit/samplingRate; } -static void recordFingerprint(FingerPrint* fp, const void* src, size_t s) +#define ADDEVENTS_RATE(_rate) ZSTD_addEvents_##_rate + +#define ZSTD_GEN_ADDEVENTS_SAMPLE(_rate) \ + static void ADDEVENTS_RATE(_rate)(FingerPrint* fp, const void* src, size_t srcSize) \ + { \ + return addEvents_generic(fp, src, srcSize, _rate); \ + } + +ZSTD_GEN_ADDEVENTS_SAMPLE(1); +ZSTD_GEN_ADDEVENTS_SAMPLE(5); + + +typedef void (*addEvents_f)(FingerPrint* fp, const void* src, size_t srcSize); + +static void recordFingerprint(FingerPrint* fp, const void* src, size_t s, addEvents_f addEvents) { ZSTD_memset(fp, 0, sizeof(*fp)); - addToFingerprint(fp, src, s); + addEvents(fp, src, s); } static S64 abs64(S64 i) { return (i < 0) ? -i : i; } @@ -124,8 +138,8 @@ static void removeEvents(FingerPrint* acc, const FingerPrint* slice) #define CHUNKSIZE (8 << 10) /* Note: technically, we use CHUNKSIZE, so that's 8 KB */ -size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, - size_t blockSizeMax, +static size_t ZSTD_splitBlock_byChunks(const void* src, size_t srcSize, + size_t blockSizeMax, addEvents_f f, void* workspace, size_t wkspSize) { FPStats* const fpstats = (FPStats*)workspace; @@ -140,18 +154,28 @@ size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; initStats(fpstats); - recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE); + recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE, f); for (pos = CHUNKSIZE; pos < blockSizeMax; pos += CHUNKSIZE) { assert(pos <= blockSizeMax - CHUNKSIZE); - recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE); + recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE, f); if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty)) { return pos; } else { mergeEvents(&fpstats->pastEvents, &fpstats->newEvents); - ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); penalty = penalty - 1 + (penalty == 0); } } return blockSizeMax; (void)flushEvents; (void)removeEvents; } + +size_t ZSTD_splitBlock(const void* src, size_t srcSize, + size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat, + void* workspace, size_t wkspSize) +{ + if (splitStrat == split_lvl2) + return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(1), workspace, wkspSize); + + assert(splitStrat == split_lvl1); + return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(5), workspace, wkspSize); +} diff --git a/lib/compress/zstd_preSplit.h b/lib/compress/zstd_preSplit.h index 7b6aadd0b..2c87d34a6 100644 --- a/lib/compress/zstd_preSplit.h +++ b/lib/compress/zstd_preSplit.h @@ -17,6 +17,8 @@ extern "C" { #endif +typedef enum { split_lvl1, split_lvl2 } ZSTD_SplitBlock_strategy_e; + #define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208 /* note: @@ -27,7 +29,9 @@ extern "C" { * therefore @blockSizeMax must be == 128 KB. * This could be extended to smaller sizes in the future. */ -size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, size_t blockSizeMax, void* workspace, size_t wkspSize); +size_t ZSTD_splitBlock(const void* src, size_t srcSize, + size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat, + void* workspace, size_t wkspSize); #if defined (__cplusplus) }