From: inikep Date: Fri, 25 Mar 2016 09:52:25 +0000 (+0100) Subject: created zstd_stats.h X-Git-Tag: v0.6.0^2~17^2~17^2~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5cc4efdaf8531399242d37d8a2ebc3df1e9364f6;p=thirdparty%2Fzstd.git created zstd_stats.h --- diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 43e6e663a..ddb816b18 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -97,11 +97,11 @@ struct ZSTD_CCtx_s U32 nextToUpdate; /* index from which to continue dictionary update */ U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 targetSrcSize; /* optimize compression for this source size */ U32 loadedDictEnd; U32 stage; ZSTD_parameters params; void* workSpace; + size_t targetSrcSize; /* optimize compression for this source size */ size_t workSpaceSize; size_t blockSize; size_t hbSize; @@ -781,12 +781,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif -#if ZSTD_OPT_DEBUG == 3 - if (offsetCode == 0) seqStorePtr->realRepSum++; - seqStorePtr->realSeqSum++; - seqStorePtr->realMatchSum += matchCode; - seqStorePtr->realLitSum += litLength; -#endif + ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); /* copy Literals */ ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); @@ -1696,6 +1691,7 @@ _storeSequence: { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; + ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); } } @@ -1942,6 +1938,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa } + + static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -1952,15 +1950,13 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; const U32 maxDist = 1 << zc->params.windowLog; -#if ZSTD_OPT_DEBUG == 3 - seqStore_t* ssPtr = &zc->seqStore; - static U32 priceFunc = 0; - ssPtr->realMatchSum = ssPtr->realLitSum = ssPtr->realSeqSum = ssPtr->realRepSum = 1; - ssPtr->priceFunc = priceFunc; -#endif + ZSTD_stats_t* stats = &zc->seqStore.stats; + + ZSTD_statsInit(stats); while (remaining) { size_t cSize; + ZSTD_statsResetFreqs(stats); if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ if (remaining < blockSize) blockSize = remaining; @@ -1992,12 +1988,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, op += cSize; } -#if ZSTD_OPT_DEBUG == 3 - ssPtr->realMatchSum += ssPtr->realSeqSum * ((zc->params.searchLength == 3) ? 3 : 4); - printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d priceFunc=%d\n", (float)ssPtr->realMatchSum/ssPtr->realSeqSum, (float)ssPtr->realLitSum/ssPtr->realSeqSum, 100.0*ssPtr->realMatchSum/(ssPtr->realMatchSum+ssPtr->realLitSum), 100.0*ssPtr->realLitSum/(ssPtr->realMatchSum+ssPtr->realLitSum), ssPtr->realRepSum, ssPtr->realSeqSum, ssPtr->priceFunc); - priceFunc++; -#endif - + ZSTD_statsPrint(stats, zc->params.searchLength); return op-ostart; } @@ -2466,9 +2457,6 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize) int tableID = ((srcSize-1) <= 256 KB) + ((srcSize-1) <= 128 KB) + ((srcSize-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ if (compressionLevel<=0) compressionLevel = 1; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; -#if ZSTD_OPT_DEBUG >= 1 - tableID=0; -#endif result = ZSTD_defaultParameters[tableID][compressionLevel]; result.srcSize = srcSize; return result; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 1358eb8ab..3561291f2 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = price func tests; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 0 // 3 = compression stats; 5 = check encoded sequences; 9 = full logs #include #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) @@ -176,6 +176,16 @@ typedef struct { U32 rep2; } ZSTD_optimal_t; +#if ZSTD_OPT_DEBUG == 3 + #include "zstd_stats.h" +#else + typedef struct { U32 unused; } ZSTD_stats_t; + MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; }; + MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; }; + MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; }; + MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }; +#endif + typedef struct { void* buffer; U32* offsetStart; @@ -208,13 +218,7 @@ typedef struct { U32 log2litSum; U32 log2offCodeSum; U32 factor; -#if ZSTD_OPT_DEBUG == 3 - U32 realMatchSum; - U32 realLitSum; - U32 realSeqSum; - U32 realRepSum; - U32 priceFunc; -#endif + ZSTD_stats_t stats; } seqStore_t; seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index e20379c11..bbfb11cab 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -130,8 +130,8 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->log2matchLengthSum - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); -#if ZSTD_OPT_DEBUG == 3 - switch (seqStorePtr->priceFunc) { +#if ZSTD_OPT_DEBUG == 333 + switch (seqStorePtr->) { default: case 0: return 1 + price + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); diff --git a/lib/zstd_stats.h b/lib/zstd_stats.h new file mode 100644 index 000000000..8d70191c4 --- /dev/null +++ b/lib/zstd_stats.h @@ -0,0 +1,164 @@ +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net +*/ +#ifndef ZSTD_STATS_H +#define ZSTD_STATS_H + + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +//#include "zstd.h" +//#include "mem.h" + + +/*-************************************* +* Constants +***************************************/ +//#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + +/*-************************************* +* Types +***************************************/ +typedef struct { + U32 priceOffset, priceOffCode, priceMatchLength, priceLiteral, priceLitLength, priceDumpsLength; + U32 totalMatchSum, totalLitSum, totalSeqSum, totalRepSum; + U32 litSum, matchLengthSum, litLengthSum, offCodeSum; + U32 matchLengthFreq[1<totalMatchSum += stats->totalSeqSum * ((searchLength == 3) ? 3 : 4); + printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d\n", (float)stats->totalMatchSum/stats->totalSeqSum, (float)stats->totalLitSum/stats->totalSeqSum, 100.0*stats->totalMatchSum/(stats->totalMatchSum+stats->totalLitSum), 100.0*stats->totalLitSum/(stats->totalMatchSum+stats->totalLitSum), stats->totalRepSum, stats->totalSeqSum); + printf("SumBytes=%d Offset=%d OffCode=%d Match=%d Literal=%d LitLength=%d DumpsLength=%d\n", (stats->priceOffset+stats->priceOffCode+stats->priceMatchLength+stats->priceLiteral+stats->priceLitLength+stats->priceDumpsLength)/8, stats->priceOffset/8, stats->priceOffCode/8, stats->priceMatchLength/8, stats->priceLiteral/8, stats->priceLitLength/8, stats->priceDumpsLength/8); +} + +MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) +{ + stats->totalLitSum = stats->totalMatchSum = stats->totalSeqSum = stats->totalRepSum = 1; + stats->priceOffset = stats->priceOffCode = stats->priceMatchLength = stats->priceLiteral = stats->priceLitLength = stats->priceDumpsLength = 0; +} + +MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) +{ + unsigned u; + + stats->litSum = (1<litLengthSum = (1<matchLengthSum = (1<offCodeSum = (1<litFreq[u] = 1; + for (u=0; u<=MaxLL; u++) + stats->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + stats->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + stats->offCodeFreq[u] = 1; +} + +MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + /* offset */ + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; + stats->priceOffCode += ZSTD_highbit(stats->offCodeSum+1) - ZSTD_highbit(stats->offCodeFreq[offCode]+1); + stats->priceOffset += (offCode-1) + (!offCode); + + /* match Length */ + stats->priceDumpsLength += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); + stats->priceMatchLength += ZSTD_highbit(stats->matchLengthSum+1) - ZSTD_highbit(stats->matchLengthFreq[(matchLength >= MaxML) ? MaxML : matchLength]+1); + + if (litLength) { + /* literals */ + U32 u; + stats->priceLiteral += litLength * ZSTD_highbit(stats->litSum+1); + for (u=0; u < litLength; u++) + stats->priceLiteral -= ZSTD_highbit(stats->litFreq[literals[u]]+1); + + /* literal Length */ + stats->priceDumpsLength += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); + stats->priceLitLength += ZSTD_highbit(stats->litLengthSum+1) - ZSTD_highbit(stats->litLengthFreq[(litLength >= MaxLL) ? MaxLL : litLength]+1); + } else { + stats->priceLitLength += ZSTD_highbit(stats->litLengthSum+1) - ZSTD_highbit(stats->litLengthFreq[0]+1); + } + + + if (offset == 0) stats->totalRepSum++; + stats->totalSeqSum++; + stats->totalMatchSum += matchLength; + stats->totalLitSum += litLength; + + U32 u; + /* literals */ + stats->litSum += litLength; + for (u=0; u < litLength; u++) + stats->litFreq[literals[u]]++; + + /* literal Length */ + stats->litLengthSum++; + if (litLength >= MaxLL) + stats->litLengthFreq[MaxLL]++; + else + stats->litLengthFreq[litLength]++; + + /* match offset */ + stats->offCodeSum++; + stats->offCodeFreq[offCode]++; + + /* match Length */ + stats->matchLengthSum++; + if (matchLength >= MaxML) + stats->matchLengthFreq[MaxML]++; + else + stats->matchLengthFreq[matchLength]++; +} + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_STATIC_H */