From: Yann Collet Date: Sat, 30 Jan 2016 23:58:06 +0000 (+0100) Subject: minor compression gain X-Git-Tag: v0.5.0~1^2~3^2~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e93d6ced17e6ecbd8b07cb5edd0f6046d5faa0ae;p=thirdparty%2Fzstd.git minor compression gain --- diff --git a/lib/fse.c b/lib/fse.c index 1f382ed10..606dcbcc3 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -205,7 +205,7 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned break; case -1: case 1: - symbolTT[s].deltaNbBits = tableLog << 16; + symbolTT[s].deltaNbBits = (tableLog << 16) - (1<>= 16; bitCount -= 16; - } - } + } } /* flush remaining bitStream */ if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ @@ -596,8 +592,7 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, while (ipmaxSymbolValue; s--) - { + for (s=255; s>maxSymbolValue; s--) { Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); } } @@ -854,7 +849,7 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) /* Build Symbol Transformation Table */ for (s=0; s<=maxSymbolValue; s++) { - symbolTT[s].deltaNbBits = nbBits << 16; + symbolTT[s].deltaNbBits = (nbBits << 16) - (1 << nbBits); symbolTT[s].deltaFindState = s-1; } diff --git a/lib/fse_static.h b/lib/fse_static.h index eb031639c..ca303db84 100644 --- a/lib/fse_static.h +++ b/lib/fse_static.h @@ -239,6 +239,19 @@ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) statePtr->stateLog = tableLog; } +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { + const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + + } +} + MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; @@ -277,6 +290,17 @@ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, con DStatePtr->table = dt + 1; } +MEM_STATIC size_t FSE_getStateValue(FSE_DState_t* DStatePtr) +{ + return DStatePtr->state; +} + +MEM_STATIC BYTE FSE_peakSymbol(FSE_DState_t* DStatePtr) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index bc78d2dd2..eb3031dcb 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -547,9 +547,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall); MEM_writeLE16(op, (U16)nbSeq); op+=2; - seqHead = op; + + if (nbSeq==0) goto _check_compressibility; /* dumps : contains rests of large lengths */ + if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/) + return ERROR(dstSize_tooSmall); + seqHead = op; { size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; if (dumpsLength < 512) { @@ -572,9 +576,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* CTable for Literal Lengths */ max = MaxLL; - mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq); + mostFrequent = FSE_countFast(count, &max, llTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *(seqStorePtr->litLengthStart); + *op++ = llTable[0]; FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); LLtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { @@ -584,8 +588,10 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, LLtype = FSE_ENCODING_RAW; } else { size_t NCountSize; + size_t nbSeq_1 = nbSeq; U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + if (count[llTable[nbSeq-1]]>1) { count[llTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; @@ -603,7 +609,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, max = MaxOff; mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *offCodeTable; + *op++ = offCodeTable[0]; FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); Offtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { @@ -613,8 +619,10 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, Offtype = FSE_ENCODING_RAW; } else { size_t NCountSize; + size_t nbSeq_1 = nbSeq; U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; @@ -624,9 +632,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* CTable for MatchLengths */ max = MaxML; - mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq); + mostFrequent = FSE_countFast(count, &max, mlTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *seqStorePtr->matchLengthStart; + *op++ = *mlTable; FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); MLtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { @@ -659,22 +667,26 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, errorCode = BIT_initCStream(&blockStream, op, oend-op); if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); /* not enough space remaining */ - FSE_initCState(&stateMatchLength, CTable_MatchLength); - FSE_initCState(&stateOffsetBits, CTable_OffsetBits); - FSE_initCState(&stateLitLength, CTable_LitLength); - for (i=(int)nbSeq-1; i>=0; i--) { + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llTable[nbSeq-1]); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); + BIT_flushBits(&blockStream); + + for (i=(int)nbSeq-2; i>=0; i--) { BYTE mlCode = mlTable[i]; U32 offset = offsetTable[i]; BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/ - U32 nbBits = (offCode-1) * (!!offCode); + U32 nbBits = (offCode-1) + (!offCode); BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 17 */ /* 17 */ if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 42 */ /* 24 bits max in 32-bits mode */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */ FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 42 */ /* 24 bits max in 32-bits mode */ BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } @@ -688,14 +700,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } /* check compressibility */ +_check_compressibility: if ((size_t)(op-ostart) >= maxCSize) return 0; return op - ostart; } -/** ZSTD_storeSeq - Store a sequence (literal length, literals, offset code and match length) into seqStore_t +/*! ZSTD_storeSeq + Store a sequence (literal length, literals, offset code and match length code) into seqStore_t @offsetCode : distance to match, or 0 == repCode @matchCode : matchLength - MINMATCH */ diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index b50e474fb..cdc7d3074 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -426,7 +426,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } - if (litSize+WILDCOPY_OVERLENGTH > srcSize) /* risk reading beyond src buffer with wildcopy */ + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) /* risk reading beyond src buffer with wildcopy */ { if (litSize > srcSize-lhSize) return ERROR(corruption_detected); memcpy(dctx->litBuffer, istart+lhSize, litSize); @@ -483,10 +483,12 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen size_t dumpsLength; /* check */ - if (srcSize < 5) return ERROR(srcSize_wrong); + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); /* SeqHead */ *nbSeq = MEM_readLE16(ip); ip+=2; + if (*nbSeq==0) return 2; + LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; @@ -589,8 +591,8 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen typedef struct { size_t litLength; - size_t offset; size_t matchLength; + size_t offset; } seq_t; typedef struct { @@ -603,7 +605,6 @@ typedef struct { const BYTE* dumpsEnd; } seqState_t; - static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) { size_t litLength; @@ -614,7 +615,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) const BYTE* const de = seqState->dumpsEnd; /* Literal length */ - litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); + litLength = FSE_peakSymbol(&(seqState->stateLL)); prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { U32 add = *dumps++; @@ -632,17 +633,20 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 }; - U32 offsetCode, nbBits; - offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */ - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - nbBits = offsetCode - 1; + U32 offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + U32 nbBits = offsetCode - 1; if (offsetCode==0) nbBits = 0; /* cmove */ offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - if (offsetCode==0) offset = prevOffset; /* cmove */ + if (offsetCode==0) offset = prevOffset; /* repcode, cmove */ if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */ + FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ } + /* Literal length update */ + FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + /* MatchLength */ matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { @@ -778,7 +782,7 @@ static size_t ZSTD_decompressSequences( ip += errorCode; /* Regen sequences */ - { + if (nbSeq) { seq_t sequence; seqState_t seqState; @@ -803,16 +807,18 @@ static size_t ZSTD_decompressSequences( } /* check if reached exact end */ - if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and exactly consumed; otherwise data is corrupted */ + if (nbSeq) + return ERROR(corruption_detected); /* DStream should be entirely and exactly consumed; otherwise data is corrupted */ + } - /* last literal segment */ - { - size_t lastLLSize = litEnd - litPtr; - if (litPtr > litEnd) return ERROR(corruption_detected); - if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - if (op != litPtr) memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } } + /* last literal segment */ + { + size_t lastLLSize = litEnd - litPtr; + if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ + if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } return op-ostart; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 7eda81366..0b993b1e8 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -102,7 +102,7 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define HufLog 12 -#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/) +#define MIN_SEQUENCES_SIZE 2 /*seqNb*/ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + MIN_SEQUENCES_SIZE) #define WILDCOPY_OVERLENGTH 8 diff --git a/programs/bench.c b/programs/bench.c index b7f367d0c..d24991431 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -308,7 +308,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, ratio = (double)srcSize / (double)cSize; DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s\r", loopNb, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.); -#if 0 +#if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ @@ -317,18 +317,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, while (BMK_GetMilliStart() == milliTime); milliTime = BMK_GetMilliStart(); - ZSTD_decompressBegin_usingDict(refDCtx, dictBuffer, dictBufferSize); for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++) { + ZSTD_decompressBegin_usingDict(refDCtx, dictBuffer, dictBufferSize); for (blockNb=0; blockNb%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", loopNb, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); /* CRC Checking */ +_findError: crcCheck = XXH64(resultBuffer, srcSize, 0); if (crcOrig!=crcCheck) { size_t u;