From 51da2d2ff245be6bcd719ee1f3d4c23225d4e653 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 20 Jan 2022 21:24:33 -0800 Subject: [PATCH] improved compression of literals in specific corner cases In rare cases, the default huffman depth selector is a bit too harsh, requiring brutal adaptations to the tree, resulting is some loss of compression ratio. This new heuristic avoids the worse cases, favoring compression ratio. As an example, compression of a specific distribution of 771 literals is now improved to 441 bytes, from 601 bytes before. --- lib/compress/huf_compress.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index e9652058d..e61b39f60 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -705,6 +705,18 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i CTable[0] = maxNbBits; } +static size_t +HUF_nbSymbolsTooLarge(const nodeElt* hnodes, U32 maxSymbolValue, U32 maxNbBits) +{ + size_t nbSTL = 0; + int s = (int)maxSymbolValue; + for ( ; s > 0; s-- ) { + if (hnodes[s].nbBits > maxNbBits) nbSTL++; + else break; + } + return nbSTL; +} + size_t HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) @@ -733,6 +745,16 @@ HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); /* determine and enforce maxTableLog */ + /* Loosen target when maxNbBits is within limits. + * A harsh rebalancing can be bad for compression ratio + * while a mild one tends to be better */ + while (maxNbBits < HUF_TABLELOG_MAX) { + size_t const nbNodes = HUF_nbSymbolsTooLarge(huffNode, maxSymbolValue, maxNbBits); + #define HUF_NB_NODES_TO_FIX_MAX 8 + if (nbNodes < HUF_NB_NODES_TO_FIX_MAX) /* heuristic */ + break; + maxNbBits++; + } maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ -- 2.47.2