]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
sizeBlockSequences() also tracks uncompressed size 3917/head
authorYann Collet <cyan@fb.com>
Mon, 26 Feb 2024 22:31:12 +0000 (14:31 -0800)
committerYann Collet <cyan@fb.com>
Mon, 26 Feb 2024 22:31:12 +0000 (14:31 -0800)
and only defines a sub-block boundary when
it believes that it is compressible.

It's effectively an optimization,
avoiding a compression cycle to reach the same conclusion.

lib/compress/zstd_compress_superblock.c

index 295ccf3049806ea9ccb3f2b35c8a0e2cc45013d9..f5430eccb0e47faebb417fcb2ddb27cbd040977e 100644 (file)
@@ -443,21 +443,29 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
                 size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
                 int firstSubBlock)
 {
-    size_t n, budget = 0;
+    size_t n, budget = 0, inSize=0;
     /* entropy headers */
-    if (firstSubBlock) {
-        budget += 120 * BYTESCALE; /* generous estimate */
-    }
+    size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
+    assert(firstSubBlock==0 || firstSubBlock==1);
+    budget += headerSize;
+
     /* first sequence => at least one sequence*/
     budget += sp[0].litLength * avgLitCost + avgSeqCost;
     if (budget > targetBudget) return 1;
+    inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
 
     /* loop over sequences */
     for (n=1; n<nbSeqs; n++) {
         size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
-        if (budget + currentCost > targetBudget) break;
         budget += currentCost;
+        inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
+        /* stop when sub-block budget is reached */
+        if ( (budget > targetBudget)
+            /* though continue to expand until the sub-block is deemed compressible */
+          && (budget < inSize * BYTESCALE) )
+            break;
     }
+
     return n;
 }