From: Danielle Rozenblit Date: Fri, 27 Jan 2023 20:04:29 +0000 (-0800) Subject: record long offsets in ZSTD_symbolEncodingTypeStats_t + add test case X-Git-Tag: v1.5.4^2~18^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9e4c66b9e92df871c8e0da61c1796d402874fed6;p=thirdparty%2Fzstd.git record long offsets in ZSTD_symbolEncodingTypeStats_t + add test case --- diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 12d73e209..8878aa100 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -299,7 +299,6 @@ typedef struct { BYTE* ofCode; size_t maxNbSeq; size_t maxNbLit; - BYTE* longOffsets; /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment @@ -347,7 +346,7 @@ typedef struct { } ZSTD_frameSizeInfo; /* decompress & legacy */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ +int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ /* custom memory allocation functions */ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fca22c4bb..a83b1ccdd 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1610,8 +1610,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder); size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)) - + ZSTD_cwksp_alloc_size(sizeof(BYTE)); /* longOffsets */ + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); @@ -2110,8 +2109,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.longOffsets = ZSTD_cwksp_reserve_buffer(ws, sizeof(BYTE)); - zc->seqStore.longOffsets[0] = 0; zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); FORWARD_IF_ERROR(ZSTD_reset_matchState( @@ -2562,16 +2559,15 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par /* See doc/zstd_compression_format.md for detailed format description */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +int ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { const seqDef* const sequences = seqStorePtr->sequencesStart; BYTE* const llCodeTable = seqStorePtr->llCode; BYTE* const ofCodeTable = seqStorePtr->ofCode; BYTE* const mlCodeTable = seqStorePtr->mlCode; - BYTE* const longOffsetsFlag = seqStorePtr->longOffsets; U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); U32 u; - BYTE longOffsets = 0; + int longOffsets = 0; assert(nbSeq <= seqStorePtr->maxNbSeq); for (u=0; u= STREAM_ACCUMULATOR_MIN); } - longOffsetsFlag[0] = longOffsets; if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) llCodeTable[seqStorePtr->longLengthPos] = MaxLL; if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; + return longOffsets; } /* ZSTD_useTargetCBlockSize(): @@ -2620,6 +2616,7 @@ typedef struct { U32 MLtype; size_t size; size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ + int longOffsets; } ZSTD_symbolEncodingTypeStats_t; /* ZSTD_buildSequencesStatistics(): @@ -2650,7 +2647,7 @@ ZSTD_buildSequencesStatistics( stats.lastCountSize = 0; /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); + stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); assert(op <= oend); assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ /* build CTable for Literal Lengths */ @@ -2774,11 +2771,11 @@ ZSTD_entropyCompressSeqStore_internal( const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; - const BYTE* const longOffsetsFlag = seqStorePtr->longOffsets; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; size_t lastCountSize; + int longOffsets = 0; entropyWorkspace = count + (MaxSeq + 1); entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); @@ -2840,10 +2837,10 @@ ZSTD_entropyCompressSeqStore_internal( *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); lastCountSize = stats.lastCountSize; op += stats.size; + longOffsets = stats.longOffsets; } - { const BYTE longOffsets = longOffsetsFlag[0]; - size_t const bitstreamSize = ZSTD_encodeSequences( + { size_t const bitstreamSize = ZSTD_encodeSequences( op, (size_t)(oend - op), CTable_MatchLength, mlCodeTable, CTable_OffsetBits, ofCodeTable, @@ -3485,7 +3482,7 @@ ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, static ZSTD_symbolEncodingTypeStats_t ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { - ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; nextEntropy->litlength_repeatMode = FSE_repeat_none; nextEntropy->offcode_repeatMode = FSE_repeat_none; nextEntropy->matchlength_repeatMode = FSE_repeat_none; diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index b04015d89..148088a76 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -186,7 +186,6 @@ BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; -BYTE SEQUENCE_LONGOFFSETS[1]; U64 WKSP[HUF_WORKSPACE_SIZE_U64]; @@ -635,7 +634,6 @@ static inline void initSeqStore(seqStore_t *seqStore) { seqStore->llCode = SEQUENCE_LLCODE; seqStore->mlCode = SEQUENCE_MLCODE; seqStore->ofCode = SEQUENCE_OFCODE; - seqStore->longOffsets = SEQUENCE_LONGOFFSETS; ZSTD_resetSeqStore(seqStore); } diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index fa8a1ae37..3e5d854f1 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -2222,6 +2222,66 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Testing large offset with small window size: ", testNb++); + { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + + /* Test large offset, small window size*/ + { + size_t srcSize = 21; + void* const src = CNBuffer; + size_t dstSize = ZSTD_compressBound(srcSize); + void* const dst = compressedBuffer; + size_t const kNbSequences = 4; + ZSTD_Sequence* sequences = malloc(sizeof(ZSTD_Sequence) * kNbSequences); + void* const checkBuf = malloc(srcSize); + const size_t largeDictSize = 1 << 30; + ZSTD_CDict* cdict = NULL; + ZSTD_DDict* ddict = NULL; + + /* Generate large dictionary */ + void* dictBuffer = calloc(largeDictSize, 1); + ZSTD_compressionParameters cParams = ZSTD_getCParams(1, srcSize, largeDictSize); + cParams.minMatch = ZSTD_MINMATCH_MIN; + cParams.hashLog = ZSTD_HASHLOG_MIN; + cParams.chainLog = ZSTD_CHAINLOG_MIN; + + cdict = ZSTD_createCDict_advanced(dictBuffer, largeDictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem); + ddict = ZSTD_createDDict_advanced(dictBuffer, largeDictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem); + + ZSTD_CCtx_refCDict(cctx, cdict); + ZSTD_DCtx_refDDict(dctx, ddict); + + sequences[0] = (ZSTD_Sequence) {3, 3, 3, 0}; + sequences[1] = (ZSTD_Sequence) {1 << 29, 0, 3, 0}; + sequences[2] = (ZSTD_Sequence) {1 << 29, 0, 9, 0}; + sequences[3] = (ZSTD_Sequence) {3, 0, 3, 0}; + + cSize = ZSTD_compressSequences(cctx, dst, dstSize, + sequences, kNbSequences, + src, srcSize); + + CHECK(ZSTD_isError(cSize), "Should not throw an error"); + + { + size_t dSize = ZSTD_decompressDCtx(dctx, checkBuf, srcSize, dst, cSize); + CHECK(ZSTD_isError(dSize), "Should not throw an error"); + CHECK(memcmp(src, checkBuf, srcSize) != 0, "Corruption!"); + } + + free(sequences); + free(checkBuf); + free(dictBuffer); + ZSTD_freeCDict(cdict); + ZSTD_freeDDict(ddict); + } + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + _end: FUZ_freeDictionary(dictionary); ZSTD_freeCStream(zc);