From: Yann Collet Date: Thu, 24 Oct 2024 21:37:00 +0000 (-0700) Subject: add faster block splitting heuristic, suitable for dfast strategy X-Git-Tag: v1.5.7^2~70^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=da2c0dffd8c3e6306fcf2e8e5bd5f8ef97d7a999;p=thirdparty%2Fzstd.git add faster block splitting heuristic, suitable for dfast strategy --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e78de2ba8..ee939f227 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4507,11 +4507,13 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src /* dynamic splitting has a cpu cost for analysis, * due to that cost it's only used for higher levels */ if (strat >= ZSTD_btopt) - return ZSTD_splitBlock(src, blockSizeMax, split_lvl3, cctx->tmpWorkspace, cctx->tmpWkspSize); + return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize); if (strat >= ZSTD_lazy2) - return ZSTD_splitBlock(src, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize); + return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize); if (strat >= ZSTD_greedy) - return ZSTD_splitBlock(src, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize); + return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize); + if (strat >= ZSTD_dfast) + return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize); /* blind split strategy * heuristic value, tested as being "generally better". * no cpu cost, but can over-split homegeneous data. diff --git a/lib/compress/zstd_preSplit.c b/lib/compress/zstd_preSplit.c index 77f4534dc..3f6edb612 100644 --- a/lib/compress/zstd_preSplit.c +++ b/lib/compress/zstd_preSplit.c @@ -26,8 +26,13 @@ #define HASHMASK (HASHTABLESIZE - 1) #define KNUTH 0x9e3779b9 +/* for hashLog > 8, hash 2 bytes. + * for hashLog == 8, just take the byte, no hashing. + * The speed of this method relies on compile-time constant propagation */ FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog) { + assert(hashLog >= 8); + if (hashLog == 8) return (U32)((const BYTE*)p)[0]; assert(hashLog <= HASHLOG_MAX); return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog); } @@ -81,6 +86,7 @@ typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize) ZSTD_GEN_RECORD_FINGERPRINT(1, 10) ZSTD_GEN_RECORD_FINGERPRINT(5, 10) ZSTD_GEN_RECORD_FINGERPRINT(11, 9) +ZSTD_GEN_RECORD_FINGERPRINT(43, 8) static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); } @@ -145,14 +151,14 @@ static void removeEvents(Fingerprint* acc, const Fingerprint* slice) #define CHUNKSIZE (8 << 10) static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, - ZSTD_SplitBlock_strategy_e splitStrat, + int level, void* workspace, size_t wkspSize) { static const RecordEvents_f records_fs[] = { - FP_RECORD(11), FP_RECORD(5), FP_RECORD(1) + FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1) }; - static const unsigned hashParams[] = { 9, 10, 10 }; - const RecordEvents_f record_f = (assert(splitStrat<=split_lvl3), records_fs[splitStrat]); + static const unsigned hashParams[] = { 8, 9, 10, 10 }; + const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]); FPStats* const fpstats = (FPStats*)workspace; const char* p = (const char*)blockStart; int penalty = THRESHOLD_PENALTY; @@ -167,7 +173,7 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, record_f(&fpstats->pastEvents, p, CHUNKSIZE); for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) { record_f(&fpstats->newEvents, p + pos, CHUNKSIZE); - if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[splitStrat])) { + if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) { return pos; } else { mergeEvents(&fpstats->pastEvents, &fpstats->newEvents); @@ -180,9 +186,9 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, } size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, - ZSTD_SplitBlock_strategy_e splitStrat, + int level, void* workspace, size_t wkspSize) { - assert(splitStrat <= split_lvl3); - return ZSTD_splitBlock_byChunks(blockStart, blockSize, splitStrat, workspace, wkspSize); + assert(0<=level && level<=3); + return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize); } diff --git a/lib/compress/zstd_preSplit.h b/lib/compress/zstd_preSplit.h index cf829ecee..07d7d4ef5 100644 --- a/lib/compress/zstd_preSplit.h +++ b/lib/compress/zstd_preSplit.h @@ -17,11 +17,10 @@ extern "C" { #endif -typedef enum { split_lvl1, split_lvl2, split_lvl3 } ZSTD_SplitBlock_strategy_e; - #define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208 -/* note: +/* @level must be a value between 0 and 3. + * higher levels spend more energy to find block boundaries * @workspace must be aligned on 8-bytes boundaries * @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE * note2: @@ -30,7 +29,7 @@ typedef enum { split_lvl1, split_lvl2, split_lvl3 } ZSTD_SplitBlock_strategy_e; * This could be extended to smaller sizes in the future. */ size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, - ZSTD_SplitBlock_strategy_e splitStrat, + int level, void* workspace, size_t wkspSize); #if defined (__cplusplus)