From: Yann Collet Date: Thu, 26 Jan 2017 00:25:38 +0000 (-0800) Subject: fixed zstdmt corruption issue when enabling overlapped sections X-Git-Tag: v1.1.3^2~19^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb0027405afb197aff767d1a3d9a759e88d105ed;p=thirdparty%2Fzstd.git fixed zstdmt corruption issue when enabling overlapped sections see Asana board for detailed explanation on why and how to fix it --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3c69a1ae0..95c7e1a7a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -60,7 +60,8 @@ struct ZSTD_CCtx_s { U32 nextToUpdate; /* index from which to continue dictionary update */ U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; + U32 loadedDictEnd; /* index of end of dictionary */ + U32 forceWindow; /* force back-references to respect limit of 1<customMem), &customMem, sizeof(customMem)); + cctx->customMem = customMem; return cctx; } @@ -118,6 +119,15 @@ size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) return sizeof(*cctx) + cctx->workSpaceSize; } +size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value) +{ + switch(param) + { + case ZSTD_p_forceWindow : cctx->forceWindow = value; return 0; + default: return ERROR(parameter_unknown); + } +} + const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ { return &(ctx->seqStore); @@ -748,6 +758,13 @@ _check_compressibility: } +#if 0 /* for debug */ +# define STORESEQ_DEBUG +#include /* fprintf */ +U32 g_startDebug = 0; +const BYTE* g_start = NULL; +#endif + /*! ZSTD_storeSeq() : Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. `offsetCode` : distance to match, or 0 == repCode. @@ -755,13 +772,14 @@ _check_compressibility: */ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) { -#if 0 /* for debug */ - static const BYTE* g_start = NULL; - const U32 pos = (U32)((const BYTE*)literals - g_start); - if (g_start==NULL) g_start = (const BYTE*)literals; - //if ((pos > 1) && (pos < 50000)) - printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); +#ifdef STORESEQ_DEBUG + if (g_startDebug) { + const U32 pos = (U32)((const BYTE*)literals - g_start); + if (g_start==NULL) g_start = (const BYTE*)literals; + if ((pos > 1895000) && (pos < 1895300)) + fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + } #endif /* copy Literals */ ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); @@ -2305,7 +2323,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, else cctx->nextToUpdate -= correction; } - if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { + if ((U32)(ip+blockSize - cctx->base) > (cctx->forceWindow ? 0 : cctx->loadedDictEnd) + maxDist) { /* enforce maxDist */ U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 04e0adfcd..988e133d6 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -238,6 +238,7 @@ void ZSTDMT_compressChunk(void* jobDescription) } else { size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1); } if (!job->firstChunk) { /* flush frame header */ size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index c53f3c3d1..9c04503d2 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1973,7 +1973,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, switch(paramType) { default : return ERROR(parameter_unknown); - case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; + case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; } return 0; } diff --git a/lib/zstd.h b/lib/zstd.h index 52d65206c..7a0aa3304 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -401,6 +401,12 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); * Gives the amount of memory used by a given ZSTD_CCtx */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +/*! ZSTD_setCCtxParameter() : + * Set advanced parameters, selected through enum ZSTD_CCtxParameter + * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ +typedef enum { ZSTD_p_forceWindow } ZSTD_CCtxParameter; +size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); + /*! ZSTD_createCDict_byReference() : * Create a digested dictionary for compression * Dictionary content is simply referenced, and therefore stays in dictBuffer. @@ -519,7 +525,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); /*===== Advanced Streaming decompression functions =====*/ -typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e; +typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); diff --git a/programs/fileio.c b/programs/fileio.c index db2bb55d6..f18e418af 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -530,7 +530,7 @@ static dRess_t FIO_createDResources(const char* dictFileName) /* Allocation */ ress.dctx = ZSTD_createDStream(); if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream"); - ZSTD_setDStreamParameter(ress.dctx, ZSTDdsp_maxWindowSize, g_memLimit); + ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit); ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 9efba323c..2cb6d65e0 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -225,7 +225,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo inBuff2 = inBuff; DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB); - { size_t const r = ZSTD_setDStreamParameter(zd, ZSTDdsp_maxWindowSize, 1000000000); /* large limit */ + { size_t const r = ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1000000000); /* large limit */ if (ZSTD_isError(r)) goto _output_error; } { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ @@ -426,7 +426,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo /* Memory restriction */ DISPLAYLEVEL(3, "test%3i : maxWindowSize < frame requirement : ", testNb++); ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB); - { size_t const r = ZSTD_setDStreamParameter(zd, ZSTDdsp_maxWindowSize, 1000); /* too small limit */ + { size_t const r = ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1000); /* too small limit */ if (ZSTD_isError(r)) goto _output_error; } inBuff.src = compressedBuffer; inBuff.size = cSize; @@ -466,6 +466,10 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) if (b1[u] != b2[u]) break; } DISPLAY("Error at position %u / %u \n", (U32)u, (U32)max); + DISPLAY(" %02X %02X %02X :%02X: %02X %02X %02X %02X %02X \n", + b1[u-3], b1[u-2], b1[u-1], b1[u-0], b1[u+1], b1[u+2], b1[u+3], b1[u+4], b1[u+5]); + DISPLAY(" %02X %02X %02X :%02X: %02X %02X %02X %02X %02X \n", + b2[u-3], b2[u-2], b2[u-1], b2[u-0], b2[u+1], b2[u+2], b2[u+3], b2[u+4], b2[u+5]); return u; } @@ -902,9 +906,8 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); } - CHECK (decompressionResult != 0, "frame not fully decoded"); - CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size") - CHECK (inBuff.pos != cSize, "compressed data should be fully read") + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize); + CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize); { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); CHECK (crcDest!=crcOrig, "decompressed data corrupted");