From: Yann Collet Date: Mon, 13 Sep 2021 02:18:21 +0000 (-0700) Subject: improve initial literals stats X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b6fa7a11cbdf9504eb40c458be7be0e30118708;p=thirdparty%2Fzstd.git improve initial literals stats --- diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index 52b0a8059..438aee80b 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -93,7 +93,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } @@ -136,7 +135,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, switch(lhSize) { case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + { U32 const lhc = hType + (((U32)!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); MEM_writeLE24(ostart, lhc); break; } diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h index 9775fb97c..94e06d9e3 100644 --- a/lib/compress/zstd_compress_literals.h +++ b/lib/compress/zstd_compress_literals.h @@ -14,6 +14,11 @@ #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ +/* Below this limit, the heuristic does not even attempt compression of literals, + * as the cost of the headers is expected to outweight the benefits. + * This limit is not applicable when re-using statistics from dictionary or previous block */ +#define COMPRESS_LITERALS_SIZE_MIN 63 + size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 532edd5bc..6345f81d2 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1253,6 +1253,31 @@ size_t ZSTD_compressBlock_btopt( } +#include "zstd_compress_literals.h" /* COMPRESS_LITERALS_SIZE_MIN */ + +static void transfer_LiteralStats(optState_t* opt, const seqStore_t* seqStore) +{ + size_t const nbLiterals = (size_t)(seqStore->lit - seqStore->litStart); + assert(seqStore->lit >= seqStore->litStart); + + // test if literals are compressible + //char block[ZSTD_BLOCKSIZE_MAX]; + //size_t const cLitSize = HUF_compress(block, sizeof(block), seqStore->litStart, nbLiterals); + //printf("nb literals = %zu => %zu huf_compress\n", nbLiterals, cLitSize); + + if (nbLiterals < COMPRESS_LITERALS_SIZE_MIN) { + /* literals won't be compressed anyway, give them a flat cost */ + /* note : it would be better if it was also possible to extend this category + * to non-compressible literals which are more numerous than threshold */ + unsigned u; + for (u=0; u<=MaxLit; u++) opt->litFreq[u]=2; + } else { + unsigned maxlit = MaxLit; + HIST_count_simple(opt->litFreq, &maxlit, seqStore->litStart, nbLiterals); + } + opt->litSum = ZSTD_downscaleStats(opt->litFreq, MaxLit, 0); /* flatten stats, by providing at least 1 to every symbol */ +} + #include "zstd_lazy.h" /* ZSTD_initStats_greedy(): @@ -1275,16 +1300,16 @@ ZSTD_initStats_greedy(ZSTD_matchState_t* ms, assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - ZSTD_compressBlock_greedy(ms, seqStore, tmpRep, src, srcSize); /* generate stats into seqstore */ - - /* transfer stats into ms-opt */ - /* literals stats */ - { unsigned maxlit = MaxLit; - assert(seqStore->lit >= seqStore->litStart); - HIST_count_simple(ms->opt.litFreq, &maxlit, seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart)); - ms->opt.litSum = ZSTD_downscaleStats(ms->opt.litFreq, MaxLit, 0); /* flatten stats, by providing at least 1 to every symbol */ + { size_t const lastLits = ZSTD_compressBlock_greedy(ms, seqStore, tmpRep, src, srcSize); /* generate stats into seqstore */ + /* add last lits into literals buffers for proper accounting */ + assert(lastLits <= srcSize); + ZSTD_memcpy(seqStore->lit , (const char*)src + srcSize - lastLits, lastLits); + seqStore->lit += lastLits; } + /* transfer stats from seqStore into ms-opt */ + transfer_LiteralStats(&ms->opt, seqStore); + /* seqStats */ assert(seqStore->sequences >= seqStore->sequencesStart); { U32 const nbSeq = (U32)(seqStore->sequences - seqStore->sequencesStart); @@ -1367,6 +1392,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { + size_t lastLits; U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); @@ -1378,13 +1404,19 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, if (srcSize <= 16 KB) { /* raw btultra, initialized by default starting stats */ - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + lastLits = ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ } else { /* in this mode, btultra is initialized greedy; * measured better for larger blocks, but not for small ones */ - ZSTD_compressBlock_btultra(ms, seqStore, tmpRep, src, srcSize); /* generate stats into ms->opt*/ + lastLits = ZSTD_compressBlock_btultra(ms, seqStore, tmpRep, src, srcSize); /* generate stats into ms->opt*/ } + /* transfer stats from seqStore into ms-opt */ + assert(lastLits <= srcSize); + ZSTD_memcpy(seqStore->lit , (const char*)src + srcSize - lastLits, lastLits); + seqStore->lit += lastLits; + transfer_LiteralStats(&ms->opt, seqStore); + /* invalidate first scan from history */ ZSTD_resetSeqStore(seqStore); ms->window.base -= srcSize;