]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
added a faster block splitter variant split5
authorYann Collet <cyan@fb.com>
Mon, 21 Oct 2024 21:56:43 +0000 (14:56 -0700)
committerYann Collet <cyan@fb.com>
Mon, 21 Oct 2024 21:56:43 +0000 (14:56 -0700)
that samples 1 in 5 positions.

This variant is fast enough for lazy2 and btlazy2,
but it's less good in combination with post-splitter at higher levels (>= btopt).

lib/compress/zstd_compress.c
lib/compress/zstd_preSplit.c
lib/compress/zstd_preSplit.h

index fcef55bf18e61a8185efd4a7254347fae8b2c714..e7a07a48415968d3520e964ca041a58d74a65fd6 100644 (file)
@@ -4500,8 +4500,10 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
         return MIN(srcSize, blockSizeMax);
     /* dynamic splitting has a cpu cost for analysis,
      * due to that cost it's only used for btlazy2+ strategies */
-    if (strat >= ZSTD_btlazy2)
-        return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, cctx->tmpWorkspace, cctx->tmpWkspSize);
+    if (strat >= ZSTD_btopt)
+        return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize);
+    if (strat >= ZSTD_lazy2)
+        return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize);
     /* blind split strategy
      * no cpu cost, but can over-split homegeneous data.
      * heuristic, tested as being "generally better".
index f831dabdec3125c0c3756a128e635ee0edb96601..721a52debcb45828588965f6ff2b3a96df2502ee 100644 (file)
@@ -46,22 +46,36 @@ static void initStats(FPStats* fpstats)
     ZSTD_memset(fpstats, 0, sizeof(FPStats));
 }
 
-static void addToFingerprint(FingerPrint* fp, const void* src, size_t s)
+FORCE_INLINE_TEMPLATE void addEvents_generic(FingerPrint* fp, const void* src, size_t srcSize, size_t samplingRate)
 {
     const char* p = (const char*)src;
-    size_t limit = s - HASHLENGTH + 1;
+    size_t limit = srcSize - HASHLENGTH + 1;
     size_t n;
-    assert(s >= HASHLENGTH);
-    for (n = 0; n < limit; n++) {
-        fp->events[hash2(p++)]++;
+    assert(srcSize >= HASHLENGTH);
+    for (n = 0; n < limit; n+=samplingRate) {
+        fp->events[hash2(p+n)]++;
     }
-    fp->nbEvents += limit;
+    fp->nbEvents += limit/samplingRate;
 }
 
-static void recordFingerprint(FingerPrint* fp, const void* src, size_t s)
+#define ADDEVENTS_RATE(_rate) ZSTD_addEvents_##_rate
+
+#define ZSTD_GEN_ADDEVENTS_SAMPLE(_rate)                                                \
+    static void ADDEVENTS_RATE(_rate)(FingerPrint* fp, const void* src, size_t srcSize) \
+    {                                                                                   \
+        return addEvents_generic(fp, src, srcSize, _rate);                              \
+    }
+
+ZSTD_GEN_ADDEVENTS_SAMPLE(1);
+ZSTD_GEN_ADDEVENTS_SAMPLE(5);
+
+
+typedef void (*addEvents_f)(FingerPrint* fp, const void* src, size_t srcSize);
+
+static void recordFingerprint(FingerPrint* fp, const void* src, size_t s, addEvents_f addEvents)
 {
     ZSTD_memset(fp, 0, sizeof(*fp));
-    addToFingerprint(fp, src, s);
+    addEvents(fp, src, s);
 }
 
 static S64 abs64(S64 i) { return (i < 0) ? -i : i; }
@@ -124,8 +138,8 @@ static void removeEvents(FingerPrint* acc, const FingerPrint* slice)
 
 #define CHUNKSIZE (8 << 10)
 /* Note: technically, we use CHUNKSIZE, so that's 8 KB */
-size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize,
-                        size_t blockSizeMax,
+static size_t ZSTD_splitBlock_byChunks(const void* src, size_t srcSize,
+                        size_t blockSizeMax, addEvents_f f,
                         void* workspace, size_t wkspSize)
 {
     FPStats* const fpstats = (FPStats*)workspace;
@@ -140,18 +154,28 @@ size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize,
     assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
 
     initStats(fpstats);
-    recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE);
+    recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE, f);
     for (pos = CHUNKSIZE; pos < blockSizeMax; pos += CHUNKSIZE) {
         assert(pos <= blockSizeMax - CHUNKSIZE);
-        recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE);
+        recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE, f);
         if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty)) {
             return pos;
         } else {
             mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
-            ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));
             penalty = penalty - 1 + (penalty == 0);
         }
     }
     return blockSizeMax;
     (void)flushEvents; (void)removeEvents;
 }
+
+size_t ZSTD_splitBlock(const void* src, size_t srcSize,
+                    size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
+                    void* workspace, size_t wkspSize)
+{
+    if (splitStrat == split_lvl2)
+        return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(1), workspace, wkspSize);
+
+    assert(splitStrat == split_lvl1);
+    return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(5), workspace, wkspSize);
+}
index 7b6aadd0b19565f740ba4542c37ef7699ed32ad7..2c87d34a698ed4910ac0108681a109ecb1b944f4 100644 (file)
@@ -17,6 +17,8 @@
 extern "C" {
 #endif
 
+typedef enum { split_lvl1, split_lvl2 } ZSTD_SplitBlock_strategy_e;
+
 #define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
 
 /* note:
@@ -27,7 +29,9 @@ extern "C" {
  * therefore @blockSizeMax must be == 128 KB.
  * This could be extended to smaller sizes in the future.
  */
-size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, size_t blockSizeMax, void* workspace, size_t wkspSize);
+size_t ZSTD_splitBlock(const void* src, size_t srcSize,
+                    size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
+                    void* workspace, size_t wkspSize);
 
 #if defined (__cplusplus)
 }