From: Yann Collet Date: Fri, 21 Jan 2022 05:24:33 +0000 (-0800) Subject: improved compression of literals in specific corner cases X-Git-Tag: v1.5.4^2~260^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=51da2d2ff245be6bcd719ee1f3d4c23225d4e653;p=thirdparty%2Fzstd.git improved compression of literals in specific corner cases In rare cases, the default huffman depth selector is a bit too harsh, requiring brutal adaptations to the tree, resulting is some loss of compression ratio. This new heuristic avoids the worse cases, favoring compression ratio. As an example, compression of a specific distribution of 771 literals is now improved to 441 bytes, from 601 bytes before. --- diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index e9652058d..e61b39f60 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -705,6 +705,18 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i CTable[0] = maxNbBits; } +static size_t +HUF_nbSymbolsTooLarge(const nodeElt* hnodes, U32 maxSymbolValue, U32 maxNbBits) +{ + size_t nbSTL = 0; + int s = (int)maxSymbolValue; + for ( ; s > 0; s-- ) { + if (hnodes[s].nbBits > maxNbBits) nbSTL++; + else break; + } + return nbSTL; +} + size_t HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) @@ -733,6 +745,16 @@ HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); /* determine and enforce maxTableLog */ + /* Loosen target when maxNbBits is within limits. + * A harsh rebalancing can be bad for compression ratio + * while a mild one tends to be better */ + while (maxNbBits < HUF_TABLELOG_MAX) { + size_t const nbNodes = HUF_nbSymbolsTooLarge(huffNode, maxSymbolValue, maxNbBits); + #define HUF_NB_NODES_TO_FIX_MAX 8 + if (nbNodes < HUF_NB_NODES_TO_FIX_MAX) /* heuristic */ + break; + maxNbBits++; + } maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */