split all full 128 KB blocks

author Yann Collet <cyan@fb.com>

Wed, 23 Oct 2024 21:11:49 +0000 (14:11 -0700)

committer Yann Collet <cyan@fb.com>

Wed, 23 Oct 2024 21:18:48 +0000 (14:18 -0700)
author Yann Collet <cyan@fb.com>
Wed, 23 Oct 2024 21:11:49 +0000 (14:11 -0700)
committer Yann Collet <cyan@fb.com>
Wed, 23 Oct 2024 21:18:48 +0000 (14:18 -0700)
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c

index b064e382ce794648d5dd4df75ceb4c24d8a39f29..9de92cef25e3719a2464b8718961bb517ce7d62b 100644 (file)
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -4493,20 +4493,21 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
  
  static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
  {
-    /* note: conservatively only split full blocks (128 KB) currently,
-     * and even then only if there is more than 128 KB input remaining.
+    /* note: conservatively only split full blocks (128 KB) currently.
+     * While it's possible to go lower, let's keep it simple for a first implementation.
+     * Besides, benefits of splitting are reduced when blocks are already small.
       */
-    if (srcSize <= 128 KB || blockSizeMax < 128 KB)
+    if (srcSize < 128 KB || blockSizeMax < 128 KB)
          return MIN(srcSize, blockSizeMax);
      /* dynamic splitting has a cpu cost for analysis,
-     * due to that cost it's only used for btlazy2+ strategies */
+     * due to that cost it's only used for higher levels */
      if (strat >= ZSTD_btopt)
          return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize);
      if (strat >= ZSTD_lazy2)
          return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize);
      /* blind split strategy
-     * no cpu cost, but can over-split homegeneous data.
       * heuristic, tested as being "generally better".
+     * no cpu cost, but can over-split homegeneous data.
       * do not split incompressible data though: respect the 3 bytes per block overhead limit.
       */
      return savings ? 92 KB : 128 KB;
author	Yann Collet <cyan@fb.com>
	Wed, 23 Oct 2024 21:11:49 +0000 (14:11 -0700)
committer	Yann Collet <cyan@fb.com>
	Wed, 23 Oct 2024 21:18:48 +0000 (14:18 -0700)