From bbe77212efebcfefb6b510f2509a674a5dd25245 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 18 Sep 2017 16:54:53 -0700 Subject: [PATCH] [libzstd] Increase MaxOff --- lib/common/zstd_internal.h | 7 +++-- lib/compress/zstd_compress.c | 33 ++++++++++++++------ lib/decompress/zstd_decompress.c | 53 ++++++++++++++++++++++---------- tests/decodecorpus.c | 2 +- 4 files changed, 65 insertions(+), 30 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index cd0dbcc27..403c0cbdb 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -123,7 +123,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define MaxLit ((1<longLengthPos] = MaxML; } -MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode, - size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog) +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType( + FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, + U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) { #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 - - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ *repeatMode = FSE_repeat_check; return set_rle; } - if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { return set_repeat; } - if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) { + if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) { *repeatMode = FSE_repeat_valid; return set_basic; } @@ -1299,6 +1309,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, count[codeTable[nbSeq-1]]--; nbSeq_1--; } + assert(nbSeq_1 > 1); CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return NCountSize; @@ -1436,7 +1447,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* CTable for Literal Lengths */ { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace); - LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog); + LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, entropy->workspace, sizeof(entropy->workspace)); @@ -1446,9 +1457,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* CTable for Offsets */ { U32 max = MaxOff; size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace); - Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog); + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff, + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, entropy->workspace, sizeof(entropy->workspace)); if (ZSTD_isError(countSize)) return countSize; op += countSize; @@ -1456,7 +1469,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace); - MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog); + MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, entropy->workspace, sizeof(entropy->workspace)); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 6d6d83396..b6bfa0c49 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -862,6 +862,15 @@ size_t ZSTD_execSequenceLast7(BYTE* op, typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { @@ -869,7 +878,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l U32 const llCode = FSE_peekSymbol(&seqState->stateLL); U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */ U32 const llBits = LL_bits[llCode]; U32 const mlBits = ML_bits[mlCode]; @@ -896,7 +905,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; /* sequence */ { size_t offset; @@ -904,8 +913,10 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l offset = 0; else { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); @@ -936,13 +947,17 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if ( MEM_32bits() - || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) - BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); @@ -1102,7 +1117,6 @@ static size_t ZSTD_decompressSequences( } - HINT_INLINE seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) { @@ -1110,7 +1124,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long U32 const llCode = FSE_peekSymbol(&seqState->stateLL); U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */ U32 const llBits = LL_bits[llCode]; U32 const mlBits = ML_bits[mlCode]; @@ -1137,7 +1151,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; /* sequence */ { size_t offset; @@ -1145,8 +1159,10 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long offset = 0; else { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); @@ -1176,11 +1192,16 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long } seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits() || - (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); { size_t const pos = seqState->pos + seq.litLength; seq.match = seqState->base + pos - seq.offset; /* single memory segment */ diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 9cde2825e..ea01d2718 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -881,7 +881,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, frame->stats.offsetSymbolSet, 28)) { Offtype = set_repeat; } else if (!(RAND(seed) & 3)) { - FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); Offtype = set_basic; } else { size_t nbSeq_1 = nbSeq; -- 2.47.2