From: Danielle Rozenblit Date: Fri, 14 Oct 2022 17:37:35 +0000 (-0700) Subject: Additional ratio optimizations X-Git-Tag: v1.5.4^2~66^2~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e60cae33cf88b035380aa6d953e08c01f5cbff96;p=thirdparty%2Fzstd.git Additional ratio optimizations --- diff --git a/lib/common/huf.h b/lib/common/huf.h index 72ffbcdff..5f33d6550 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,7 +173,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* **************************************** * HUF detailed API * ****************************************/ -#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btlazy2 +#define HUF_OPTIMAL_DEPTH_THRESHOLD 1 typedef enum { HUF_depth_fast, /** Use heuristic to find the table depth**/ HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/ @@ -191,6 +191,7 @@ typedef enum { * or to save and regenerate 'CTable' using external methods. */ unsigned HUF_minTableLog(size_t srcSize, unsigned maxSymbolValue); +unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode); size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index b738abfe1..f2686f210 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1235,12 +1235,23 @@ typedef struct { #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ +unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) +{ + unsigned cardinality = 0; + unsigned i; + + for (i = 0; i < maxSymbolValue + 1; i++) { + if (count[i] != 0) cardinality += 1; + } + + return cardinality; +} + unsigned HUF_minTableLog(size_t srcSize, unsigned maxSymbolValue) { U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; + U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 1; U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - if (minBits < FSE_MIN_TABLELOG) minBits = FSE_MIN_TABLELOG; assert(srcSize > 1); /* Not supported, RLE should be used instead */ return minBits; } @@ -1255,10 +1266,11 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS size_t optSize = ((size_t) ~0); unsigned huffLog; size_t maxBits, hSize, newSize; + unsigned cardinality = HUF_cardinality(count, maxSymbolValue); if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; - for (huffLog = HUF_minTableLog(srcSize, maxSymbolValue); huffLog <= maxTableLog; huffLog++) { + for (huffLog = HUF_minTableLog(srcSize, cardinality); huffLog <= maxTableLog; huffLog++) { maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, workSpace, wkspSize);