From: Yann Collet Date: Wed, 6 Jul 2016 14:12:38 +0000 (+0200) Subject: dictBuilder protection vs huge sample sets (>2 GB) X-Git-Tag: v0.7.3^2~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=99b045b70a08dcf73c924934105a3bd55da3cf6e;p=thirdparty%2Fzstd.git dictBuilder protection vs huge sample sets (>2 GB) --- diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index e81cdb3ae..f559f5583 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -31,6 +31,12 @@ - Zstd homepage : https://www.zstd.net */ +/*-************************************** +* Tuning parameters +****************************************/ +#define ZDICT_MAX_SAMPLES_SIZE (1500U << 20) + + /*-************************************** * Compiler Options ****************************************/ @@ -481,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList) static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, - const void* const buffer, const size_t bufferSize, /* buffer must end with noisy guard band */ + const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */ const size_t* fileSizes, unsigned nbFiles, U32 shiftRatio, unsigned maxDictSize) { @@ -503,6 +509,10 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, if (minRatio < MINRATIO) minRatio = MINRATIO; memset(doneMarks, 0, bufferSize+16); + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduce to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + /* sort */ DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); @@ -703,7 +713,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, } if (compressionLevel==0) compressionLevel=g_compressionLevel_default; params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize); - //params.cParams.strategy = ZSTD_greedy; params.fParams.contentSizeFlag = 0; { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0); if (ZSTD_isError(beginResult)) {