From: Yann Collet Date: Fri, 20 May 2016 12:36:36 +0000 (+0200) Subject: huff0 dynamic reduction X-Git-Tag: v0.7.0^2~65 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f22a0d653d3292219f5cab3f839b1368f15e5de2;p=thirdparty%2Fzstd.git huff0 dynamic reduction --- diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 48021dfaf..694df4e3b 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -162,14 +162,12 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val) # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; - unsigned r; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; - r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; - return r; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; # endif } @@ -375,7 +373,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) { - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */ return BIT_DStream_overflow; if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { diff --git a/lib/common/fse_static.h b/lib/common/fse_static.h index 0661dbd3e..797258f75 100644 --- a/lib/common/fse_static.h +++ b/lib/common/fse_static.h @@ -64,19 +64,22 @@ extern "C" { * FSE advanced API *******************************************/ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); -/* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ +/**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */ +/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); -/* build a fake FSE_CTable, designed to compress always the same symbolValue */ +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); -/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ +/**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); -/* build a fake FSE_DTable, designed to always generate the same symbolValue */ +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ /* ***************************************** @@ -103,7 +106,7 @@ static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsig static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); -/*! +/**< These functions are inner components of FSE_compress_usingCTable(). They allow the creation of custom streams, mixing multiple tables and bit sources. @@ -163,7 +166,7 @@ static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bi static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); -/*! +/**< Let's now decompose FSE_decompress_usingDTable() into its unitary components. You will decode FSE-encoded symbols from the bitStream, and also any other bitFields you put in, **in reverse order**. diff --git a/lib/common/huf.h b/lib/common/huf.h index d07080b15..a06fd3e18 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -77,6 +77,7 @@ HUF_decompress() : /* **************************************** * Tool functions ******************************************/ +#define HUF_BLOCKSIZE_MAX (128 * 1024) size_t HUF_compressBound(size_t size); /**< maximum compressed size */ /* Error Management */ diff --git a/lib/common/huf_static.h b/lib/common/huf_static.h index e68ec33dd..ea3eb62b8 100644 --- a/lib/common/huf_static.h +++ b/lib/common/huf_static.h @@ -85,15 +85,17 @@ size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS /*! HUF_compress() does the following: 1. count symbol occurrence from source[] into table count[] using FSE_count() -2. build Huffman table from count using HUF_buildCTable() -3. save Huffman table to memory buffer using HUF_writeCTable() -4. encode the data stream using HUF_compress4X_usingCTable() +2. (optional) refine tableLog using HUF_optimalTableLog() +3. build Huffman table from count using HUF_buildCTable() +4. save Huffman table to memory buffer using HUF_writeCTable() +5. encode the data stream using HUF_compress4X_usingCTable() The following API allows targeting specific sub-functions for advanced tasks. For example, it's possible to compress several blocks using the same 'CTable', or to save and regenerate 'CTable' using external methods. */ /* FSE_count() : find it within "fse.h" */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); @@ -137,16 +139,19 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* sr /* ************************************************************** * Constants ****************************************************************/ -#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ -#define HUF_MAX_SYMBOL_VALUE 255 -#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) -# error "HUF_MAX_TABLELOG is too large !" +#define HUF_TABLELOG_ABSOLUTEMAX 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT HUF_TABLELOG_MAX /* tableLog by default, when not specified */ +#define HUF_SYMBOLVALUE_MAX 255 +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" #endif +/* ************************************************************** +* Needed by zstd in both compression and decompression +****************************************************************/ /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). `huffWeight` is destination buffer. @@ -188,17 +193,17 @@ MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, } /* collect weight stats */ - memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } /* get last non-null symbol weight (implied, total must be 2^n) */ { U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); *tableLogPtr = tableLog; /* determine last weight */ { U32 const total = 1 << tableLog; diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index cde30acff..f1068433a 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -442,7 +442,7 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) return (FSE_CTable*)malloc(size); } -void FSE_freeCTable (FSE_CTable* ct) { free(ct); } +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) @@ -453,9 +453,9 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) return minBits; } -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) { - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - 2; + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; U32 tableLog = maxTableLog; U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; @@ -466,6 +466,11 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS return tableLog; } +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + /* Secondary normalization method. To be used when primary method fails. */ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 695b66dae..a1004fba3 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -64,7 +64,7 @@ #include /* printf (debug) */ #include "huf_static.h" #include "bitstream.h" -#include "fse.h" /* header compression */ +#include "fse_static.h" /* header compression */ /* ************************************************************** @@ -73,6 +73,15 @@ #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + /* ******************************************************* * HUF : Huffman block compression *********************************************************/ @@ -94,14 +103,14 @@ typedef struct nodeElt_s { size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) { - BYTE bitsToWeight[HUF_MAX_TABLELOG + 1]; - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; U32 n; BYTE* op = (BYTE*)dst; size_t size; /* check conditions */ - if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1) + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX + 1) return ERROR(GENERIC); /* convert to weight */ @@ -159,19 +168,19 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) { - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; size_t readSize; U32 nbSymbols = 0; //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ /* get symbol weights */ - readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize); + readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(readSize)) return readSize; /* check result */ - if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); /* Prepare base value per rank */ @@ -189,12 +198,12 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si }} /* fill val */ - { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; - U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; { U32 n; for (n=0; n0; n--) { + U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) { valPerRank[n] = min; /* get starting value within each rank */ min += nbPerRank[n]; min >>= 1; @@ -229,7 +238,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) /* repay normalized cost */ { U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_MAX_TABLELOG+1]; + U32 rankLast[HUF_TABLELOG_MAX+1]; int pos; /* Get pos of last (smallest) symbol per rank */ @@ -253,7 +262,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) if (highTotal <= lowTotal) break; } } /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - while ((nBitsToDecrease<=HUF_MAX_TABLELOG) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ nBitsToDecrease ++; totalCost -= 1 << (nBitsToDecrease-1); if (rankLast[nBitsToDecrease-1] == noSymbol) @@ -312,10 +321,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) } -#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1) +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) { - nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1]; + nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1]; nodeElt* huffNode = huffNode0 + 1; U32 n, nonNullRank; int lowS, lowN; @@ -323,8 +332,8 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3 U32 nodeRoot; /* safety checks */ - if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG; - if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); memset(huffNode0, 0, sizeof(huffNode0)); /* sort, decreasing order */ @@ -360,9 +369,9 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); /* fill result into tree (val, nbBits) */ - { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; - U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; - if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC); /* check fit into table */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ for (n=0; n<=nonNullRank; n++) nbPerRank[huffNode[n].nbBits]++; /* determine stating value per rank */ @@ -391,10 +400,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } #define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) #define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream) + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) #define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream) + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { @@ -441,44 +450,47 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - size_t segmentSize = (srcSize+3)/4; /* first 3 segments */ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - size_t errorCode; if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ if (srcSize < 12) return 0; /* no saving possible : too small input */ op += 6; /* jumpTable */ - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - MEM_writeLE16(ostart, (U16)errorCode); + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } ip += segmentSize; - op += errorCode; - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - MEM_writeLE16(ostart+2, (U16)errorCode); + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } ip += segmentSize; - op += errorCode; - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - MEM_writeLE16(ostart+4, (U16)errorCode); + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } ip += segmentSize; - op += errorCode; - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + op += cSize; + } - op += errorCode; return op-ostart; } @@ -493,43 +505,46 @@ static size_t HUF_compress_internal ( BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - U32 count[HUF_MAX_SYMBOL_VALUE+1]; - HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; - size_t errorCode; + U32 count[HUF_SYMBOLVALUE_MAX+1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; /* checks & inits */ - if (srcSize < 1) return 0; /* Uncompressed - note : 1 means rle, so first byte must be correct */ - if (dstSize < 1) return 0; /* not compressible within dst budget */ - if (srcSize > 128 * 1024) return ERROR(srcSize_wrong); /* current block size limit */ - if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE; - if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG; + if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ + if (!dstSize) return 0; /* cannot fit within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; /* Scan input and build symbol stats */ - errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } - if (errorCode <= (srcSize >> 7)+1) return 0; /* Heuristic : not compressible enough */ + { size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); + if (HUF_isError(largest)) return largest; + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } /* Build Huffman Tree */ - errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); - if (HUF_isError(errorCode)) return errorCode; - huffLog = (U32)errorCode; + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); + if (HUF_isError(maxBits)) return maxBits; + huffLog = (U32)maxBits; + } /* Write table description header */ - errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode + 12 >= srcSize) return 0; /* not useful to try compression */ - op += errorCode; + { size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); + if (HUF_isError(hSize)) return hSize; + if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ + op += hSize; + } /* Compress */ - if (singleStream) - errorCode = HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable); /* single segment */ - else - errorCode = HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - op += errorCode; + { size_t const cSize = (singleStream) ? + HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : /* single segment */ + HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; /* uncompressible */ + op += cSize; + } /* check compressibility */ if ((size_t)(op-ostart) >= srcSize-1) @@ -556,5 +571,5 @@ size_t HUF_compress2 (void* dst, size_t dstSize, size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG); + return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); } diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index e42f76300..01e9c07b2 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -92,8 +92,8 @@ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) { - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; size_t iSize; U32 nbSymbols = 0; @@ -105,7 +105,7 @@ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -148,7 +148,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ @@ -203,7 +203,7 @@ size_t HUF_decompress1X2_usingDTable( size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); @@ -312,7 +312,7 @@ size_t HUF_decompress4X2_usingDTable( size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); @@ -335,7 +335,7 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co U32 nbBitsBaseline, U16 baseSeq) { HUF_DEltX4 DElt; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* get pre-calculated rankVal */ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); @@ -369,14 +369,14 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co }} } -typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; +typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1]; static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, const U32 sortedListSize, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) { - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ const U32 minBits = nbBitsBaseline - maxWeight; U32 s; @@ -415,10 +415,10 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) { - BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; - sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; - U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 }; + U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 }; U32* const rankStart = rankStart0+1; rankVal_t rankVal; U32 tableLog, maxW, sizeOfSort, nbSymbols; @@ -428,10 +428,10 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1; HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ - if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -517,7 +517,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ @@ -580,7 +580,7 @@ size_t HUF_decompress1X4_usingDTable( size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); @@ -688,7 +688,7 @@ size_t HUF_decompress4X4_usingDTable( size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); @@ -716,7 +716,7 @@ static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSeque const int scaleLog = nbBitsBaseline - sizeLog; /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */ const int minBits = nbBitsBaseline - maxWeight; const U32 level = DDesc.nbBytes; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; U32 symbolStartPos, s; /* local rankVal, will be modified */ @@ -766,20 +766,20 @@ static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSeque /* note : same preparation as X4 */ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) { - BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; - sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; - U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 }; + U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 }; U32* const rankStart = rankStart0+1; U32 tableLog, maxW, sizeOfSort, nbSymbols; rankVal_t rankVal; const U32 memLog = DTable[0]; size_t iSize; - if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -838,7 +838,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) DDesc.nbBits = 0; DDesc.nbBytes = 0; HUF_fillDTableX6LevelN(DDescription, DSequence, memLog, - (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW, + (const U32 (*)[HUF_TABLELOG_ABSOLUTEMAX + 1])rankVal, 0, 1, maxW, sortedSymbol, sizeOfSort, rankStart0, tableLog+1, DSeq, DDesc); } @@ -879,7 +879,7 @@ static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStr ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog) #define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) #define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \ @@ -939,7 +939,7 @@ size_t HUF_decompress1X6_usingDTable( size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize); @@ -1068,7 +1068,7 @@ size_t HUF_decompress4X6_usingDTable( size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);