From 8d31e8ec42a736bf7cc70f9f21e9c1afc920c148 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 14:31:12 -0800 Subject: [PATCH] sizeBlockSequences() also tracks uncompressed size and only defines a sub-block boundary when it believes that it is compressible. It's effectively an optimization, avoiding a compression cycle to reach the same conclusion. --- lib/compress/zstd_compress_superblock.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 295ccf304..f5430eccb 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -443,21 +443,29 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, int firstSubBlock) { - size_t n, budget = 0; + size_t n, budget = 0, inSize=0; /* entropy headers */ - if (firstSubBlock) { - budget += 120 * BYTESCALE; /* generous estimate */ - } + size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ + assert(firstSubBlock==0 || firstSubBlock==1); + budget += headerSize; + /* first sequence => at least one sequence*/ budget += sp[0].litLength * avgLitCost + avgSeqCost; if (budget > targetBudget) return 1; + inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); /* loop over sequences */ for (n=1; n targetBudget) break; budget += currentCost; + inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH); + /* stop when sub-block budget is reached */ + if ( (budget > targetBudget) + /* though continue to expand until the sub-block is deemed compressible */ + && (budget < inSize * BYTESCALE) ) + break; } + return n; } -- 2.47.2