From: Yann Collet Date: Sat, 14 Oct 2017 08:21:43 +0000 (-0700) Subject: dictionary compression use correct file size estimation X-Git-Tag: v1.3.3^2~52^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fc8d293460d6767558843ac8231c249dcc704382;p=thirdparty%2Fzstd.git dictionary compression use correct file size estimation when determining compression parameters to compress one file only. For multiple files, it still "bets" that files are going to be small. There was also a bug recently added in ZSTD_CCtx_loadDictionary_advanced() making it incapable to use pledgedSrcSize to determine compression parameters. --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4ceba415c..c7872e7be 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -531,7 +531,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced( cctx->cdict = NULL; } else { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, 0, dictSize); + ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); cctx->cdictLocal = ZSTD_createCDict_advanced( dict, dictSize, dictLoadMethod, dictMode, diff --git a/programs/fileio.c b/programs/fileio.c index 01e901ed8..17ad31b32 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -442,7 +442,6 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_contentSizeFlag, 1) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) ); - (void)srcSize; /* compression level */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) ); /* long distance matching */ @@ -468,7 +467,9 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, DISPLAYLEVEL(5,"set nb threads = %u \n", g_nbThreads); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) ); /* dictionary */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* just to load dictionary with good compression parameters */ CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ } #elif defined(ZSTD_MULTITHREAD) { ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize); @@ -1006,7 +1007,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile size_t dfnSize = FNSPACE; char* dstFileName = (char*)malloc(FNSPACE); size_t const suffixSize = suffix ? strlen(suffix) : 0; - U64 const srcSize = (nbFiles != 1) ? 0 : UTIL_getFileSize(inFileNamesTable[0]) ; + U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : UTIL_getFileSize(inFileNamesTable[0]) ; cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); /* init */ @@ -1032,9 +1033,9 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); - if (!dstFileName) + if (!dstFileName) { EXM_THROW(30, "zstd: %s", strerror(errno)); - } + } } strcpy(dstFileName, inFileNamesTable[u]); strcat(dstFileName, suffix); missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel);