From: Yann Collet <cyan@fb.com>
Date: Mon, 26 Feb 2024 22:31:12 +0000 (-0800)
Subject: sizeBlockSequences() also tracks uncompressed size
X-Git-Tag: v1.5.6^2~60^2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F3917%2Fhead;p=thirdparty%2Fzstd.git

sizeBlockSequences() also tracks uncompressed size

and only defines a sub-block boundary when
it believes that it is compressible.

It's effectively an optimization,
avoiding a compression cycle to reach the same conclusion.
---

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 295ccf304..f5430eccb 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -443,21 +443,29 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
                 size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
                 int firstSubBlock)
 {
-    size_t n, budget = 0;
+    size_t n, budget = 0, inSize=0;
     /* entropy headers */
-    if (firstSubBlock) {
-        budget += 120 * BYTESCALE; /* generous estimate */
-    }
+    size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
+    assert(firstSubBlock==0 || firstSubBlock==1);
+    budget += headerSize;
+
     /* first sequence => at least one sequence*/
     budget += sp[0].litLength * avgLitCost + avgSeqCost;
     if (budget > targetBudget) return 1;
+    inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
 
     /* loop over sequences */
     for (n=1; n<nbSeqs; n++) {
         size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
-        if (budget + currentCost > targetBudget) break;
         budget += currentCost;
+        inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
+        /* stop when sub-block budget is reached */
+        if ( (budget > targetBudget)
+            /* though continue to expand until the sub-block is deemed compressible */
+          && (budget < inSize * BYTESCALE) )
+            break;
     }
+
     return n;
 }