]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
new block splitting variant _fromBorders
authorYann Collet <cyan@fb.com>
Thu, 24 Oct 2024 22:57:29 +0000 (15:57 -0700)
committerYann Collet <cyan@fb.com>
Fri, 25 Oct 2024 23:13:55 +0000 (16:13 -0700)
less precise but still suitable for `fast` strategy.

lib/compress/hist.c
lib/compress/hist.h
lib/compress/zstd_compress.c
lib/compress/zstd_preSplit.c

index e2fb431f03ab527ebf0ee8cd86b9f0a408fe63a8..4ccf9a90a9ead81993135c13ee23a0a2d7cb9bf5 100644 (file)
@@ -26,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
 /*-**************************************************************
  *  Histogram functions
  ****************************************************************/
+void HIST_add(unsigned* count, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+
+    while (ip<end) {
+        count[*ip++]++;
+    }
+}
+
 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
                            const void* src, size_t srcSize)
 {
index 887896b813bc3b977d13ff95c57ce973da2e42fd..e6e39d34890aa99373777e6d4cebdfceee5fb60d 100644 (file)
@@ -73,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  */
 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
                            const void* src, size_t srcSize);
+
+/*! HIST_add() :
+ *  Lowest level: just add nb of occurences of characters from @src into @count.
+ *  @count is not reset. @count array is presumed large enough (i.e. 1 KB).
+ @  This function does not need any additional stack memory.
+ */
+void HIST_add(unsigned* count, const void* src, size_t srcSize);
index ee939f22740a658b921132139f590c521ba9f445..5f8f5aba4885ed097a9495c8e535384aed636704 100644 (file)
@@ -4493,6 +4493,8 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
 
 static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
 {
+    /* split level based on compression strategy, from `fast` to `btultra2` */
+    static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 };
     /* note: conservatively only split full blocks (128 KB) currently.
      * While it's possible to go lower, let's keep it simple for a first implementation.
      * Besides, benefits of splitting are reduced when blocks are already small.
@@ -4500,25 +4502,13 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
     if (srcSize < 128 KB || blockSizeMax < 128 KB)
         return MIN(srcSize, blockSizeMax);
     /* do not split incompressible data though:
-     * ensure a 3 bytes per full block overhead limit.
-     * Note: as a consequence, the first full block skips the splitting detector.
+     * require verified savings to allow pre-splitting.
+     * Note: as a consequence, the first full block is not split.
      */
     if (savings < 3) return 128 KB;
     /* dynamic splitting has a cpu cost for analysis,
-     * due to that cost it's only used for higher levels */
-    if (strat >= ZSTD_btopt)
-        return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
-    if (strat >= ZSTD_lazy2)
-        return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
-    if (strat >= ZSTD_greedy)
-        return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
-    if (strat >= ZSTD_dfast)
-        return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
-    /* blind split strategy
-     * heuristic value, tested as being "generally better".
-     * no cpu cost, but can over-split homegeneous data.
-     */
-    return 92 KB;
+     * select a variant among multiple gradual speed/accuracy tradeoffs */
+    return ZSTD_splitBlock(src, blockSizeMax, splitLevels[strat], cctx->tmpWorkspace, cctx->tmpWkspSize);
 }
 
 /*! ZSTD_compress_frameChunk() :
index 3f6edb612f3605944c8d63a3259f3d6f3b4c6b53..d25773950b9a1d283e195e99e2301bad129e8e7d 100644 (file)
@@ -12,6 +12,7 @@
 #include "../common/mem.h" /* S64 */
 #include "../common/zstd_deps.h" /* ZSTD_memset */
 #include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
+#include "hist.h" /* HIST_add */
 #include "zstd_preSplit.h"
 
 
@@ -77,10 +78,10 @@ typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize)
 
 #define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
 
-#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize)                                      \
+#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize)                                 \
     static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
-    {                                                                                   \
-        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                     \
+    {                                                                              \
+        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
     }
 
 ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
@@ -185,10 +186,52 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
     (void)flushEvents; (void)removeEvents;
 }
 
+/* ZSTD_splitBlock_fromBorders(): very fast strategy :
+ * compare fingerprint from beginning and end of the block,
+ * derive from their difference if it's preferable to split in the middle,
+ * repeat the process a second time, for finer grained decision.
+ * 3 times did not brought improvements, so I stopped at 2.
+ * Benefits are good enough for a cheap heuristic.
+ * More accurate splitting saves more, but speed impact is also more perceptible.
+ * For better accuracy, use more elaborate variant *_byChunks.
+ */
+static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
+                        void* workspace, size_t wkspSize)
+{
+#define SEGMENT_SIZE 512
+    FPStats* const fpstats = (FPStats*)workspace;
+    Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
+    assert(blockSize == (128 << 10));
+    assert(workspace != NULL);
+    assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
+    ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
+    assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
+
+    initStats(fpstats);
+    HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
+    HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
+    fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
+    if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
+        return blockSize;
+
+    HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
+    middleEvents->nbEvents = SEGMENT_SIZE;
+    {   U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
+        U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
+        U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
+        if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
+            return 64 KB;
+        return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
+    }
+}
+
 size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
                     int level,
                     void* workspace, size_t wkspSize)
 {
-    assert(0<=level && level<=3);
-    return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
+    assert(0<=level && level<=4);
+    if (level == 0)
+        return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
+    /* level >= 1*/
+    return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
 }