From: Yann Collet Date: Mon, 26 Feb 2024 22:31:12 +0000 (-0800) Subject: sizeBlockSequences() also tracks uncompressed size X-Git-Tag: v1.5.6^2~60^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F3917%2Fhead;p=thirdparty%2Fzstd.git sizeBlockSequences() also tracks uncompressed size and only defines a sub-block boundary when it believes that it is compressible. It's effectively an optimization, avoiding a compression cycle to reach the same conclusion. --- diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 295ccf304..f5430eccb 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -443,21 +443,29 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, int firstSubBlock) { - size_t n, budget = 0; + size_t n, budget = 0, inSize=0; /* entropy headers */ - if (firstSubBlock) { - budget += 120 * BYTESCALE; /* generous estimate */ - } + size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ + assert(firstSubBlock==0 || firstSubBlock==1); + budget += headerSize; + /* first sequence => at least one sequence*/ budget += sp[0].litLength * avgLitCost + avgSeqCost; if (budget > targetBudget) return 1; + inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); /* loop over sequences */ for (n=1; n targetBudget) break; budget += currentCost; + inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH); + /* stop when sub-block budget is reached */ + if ( (budget > targetBudget) + /* though continue to expand until the sub-block is deemed compressible */ + && (budget < inSize * BYTESCALE) ) + break; } + return n; }