/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
-static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
{
U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
return hashLog - btScale;
* @return : 0, or an error code
*/
static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+ ldmState_t* ls,
ZSTD_cwksp* ws,
ZSTD_CCtx_params const* params,
const void* src, size_t srcSize,
ZSTD_window_update(&ms->window, src, srcSize);
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+ if (params->ldmParams.enableLdm && ls != NULL) {
+ ZSTD_window_update(&ls->window, src, srcSize);
+ ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+ }
+
/* Assert that we the ms params match the params we're being given */
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+ if (params->ldmParams.enableLdm && ls != NULL && srcSize >= params->ldmParams.minMatchLength)
+ ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams);
+
switch(params->cParams.strategy)
{
case ZSTD_fast:
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
- ms, ws, params, dictPtr, dictContentSize, dtlm));
+ ms, NULL, ws, params, dictPtr, dictContentSize, dtlm));
return dictID;
}
}
static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
ZSTD_matchState_t* ms,
+ ldmState_t* ls,
ZSTD_cwksp* ws,
const ZSTD_CCtx_params* params,
const void* dict, size_t dictSize,
/* dict restricted modes */
if (dictContentType == ZSTD_dct_rawContent)
- return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm);
+ return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
if (dictContentType == ZSTD_dct_auto) {
DEBUGLOG(4, "raw content dictionary detected");
return ZSTD_loadDictionaryContent(
- ms, ws, params, dict, dictSize, dtlm);
+ ms, ls, ws, params, dict, dictSize, dtlm);
}
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
assert(0); /* impossible */
{ size_t const dictID = cdict ?
ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
- &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+ &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
cdict->dictContentSize, dictContentType, dtlm,
cctx->entropyWorkspace)
: ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
- &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+ &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
dictContentType, dtlm, cctx->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
assert(dictID <= UINT_MAX);
params.fParams.contentSizeFlag = 1;
params.cParams = cParams;
{ size_t const dictID = ZSTD_compress_insertDictionary(
- &cdict->cBlockState, &cdict->matchState, &cdict->workspace,
+ &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
¶ms, cdict->dictContent, cdict->dictContentSize,
dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
typedef struct {
ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
+ U32 loadedDictEnd;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
* Depends on ldmParams.minMatchLength */
*/
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+/** ZSTD_cycleLog() :
+ * condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
#endif /* ZSTD_COMPRESS_H */
return rollingHash;
}
+void ZSTD_ldm_fillHashTable(
+ ldmState_t* state, const BYTE* ip,
+ const BYTE* iend, ldmParams_t const* params)
+{
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
+ ZSTD_ldm_fillLdmHashTable(
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
+ params->hashLog - params->bucketSizeLog,
+ *params);
+}
+
/** ZSTD_ldm_limitTableUpdate() :
*
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
*/
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize);
if (sequence.offset == 0)
break;
- assert(sequence.offset <= (1U << cParams->windowLog));
assert(ip + sequence.litLength + sequence.matchLength <= iend);
/* Fill tables for block compressor */
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+void ZSTD_ldm_fillHashTable(
+ ldmState_t* state, const BYTE* ip,
+ const BYTE* iend, ldmParams_t const* params);
+
/**
* ZSTD_ldm_generateSequences():
*
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
} serialState_t;
-static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
+static int ZSTDMT_serialState_reset(serialState_t* serialState,
+ ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params,
+ size_t jobSize, const void* dict, size_t const dictSize)
{
/* Adjust parameters */
if (params.ldmParams.enableLdm) {
memset(serialState->ldmState.hashTable, 0, hashSize);
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
}
+
+ /* Update window state and fill hash table with dict */
+ if (params.ldmParams.enableLdm && dict) {
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+ ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, (const BYTE*)dict + dictSize, ¶ms.ldmParams);
+ }
+
serialState->params = params;
serialState->params.jobSize = (U32)jobSize;
return 0;
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0))
return ERROR(memory_allocation);
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
mtctx->allJobsCompleted = 0;
mtctx->consumed = 0;
mtctx->produced = 0;
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, dict, dictSize))
return ERROR(memory_allocation);
return 0;
}
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
#include "zstd.h"
#include "zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
+#include "zstd_compress_internal.h"
#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
# include <zlib.h>
/*-*************************************
* Constants
***************************************/
-#define KB *(1<<10)
-#define MB *(1<<20)
-#define GB *(1U<<30)
-
#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
#define FNSPACE 30
-#define PATCHFROM_WINDOWSIZE_EXTRA_BYTES 1 KB
-
/*-*************************************
* Macros
***************************************/
return count;
}
+static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
+ unsigned long long const dictSize,
+ unsigned long long const maxSrcFileSize)
+{
+ unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
+ assert(maxSize != UTIL_FILESIZE_UNKNOWN);
+ if (maxSize > UINT_MAX)
+ EXM_THROW(42, "Can't handle files larger than %u GB\n", UINT_MAX/(1 GB) + 1);
+ FIO_setMemLimit(prefs, (unsigned)maxSize);
+}
#ifndef ZSTD_NOCOMPRESS
size_t srcBufferSize;
void* dstBuffer;
size_t dstBufferSize;
+ void* dictBuffer;
+ size_t dictBufferSize;
const char* dictFileName;
ZSTD_CStream* cctx;
} cRess_t;
+static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
+ ZSTD_compressionParameters* comprParams,
+ unsigned long long const dictSize,
+ unsigned long long const maxSrcFileSize,
+ int cLevel)
+{
+ unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
+ FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
+ if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
+ DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
+ comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog);
+ if (fileWindowLog > ZSTD_cycleLog(comprParams->hashLog, cParams.strategy)) {
+ if (!prefs->ldmFlag)
+ DISPLAYLEVEL(1, "long mode automaticaly triggered\n");
+ FIO_setLdmFlag(prefs, 1);
+ }
+ if (cParams.strategy >= ZSTD_btopt) {
+ DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
+ DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli");
+ DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n");
+ DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=31)\n");
+ DISPLAYLEVEL(1, "Also consdier playing around with searchLog and hashLog\n");
+ }
+}
+
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
- const char* dictFileName, const size_t maxSrcFileSize,
+ const char* dictFileName, unsigned long long const maxSrcFileSize,
int cLevel, ZSTD_compressionParameters comprParams) {
cRess_t ress;
memset(&ress, 0, sizeof(ress));
ress.srcBufferSize = ZSTD_CStreamInSize();
ress.srcBuffer = malloc(ress.srcBufferSize);
ress.dstBufferSize = ZSTD_CStreamOutSize();
+
+ /* need to update memLimit before calling createDictBuffer
+ * because of memLimit check inside it */
+ if (prefs->patchFromMode)
+ FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), maxSrcFileSize, cLevel);
ress.dstBuffer = malloc(ress.dstBufferSize);
+ ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */
if (!ress.srcBuffer || !ress.dstBuffer)
EXM_THROW(31, "allocation error : not enough memory");
/* Advanced parameters, including dictionary */
- { void* dictBuffer;
- size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */
- if (dictFileName && (dictBuffer==NULL))
- EXM_THROW(32, "allocation error : can't create dictBuffer");
- ress.dictFileName = dictFileName;
-
- if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
- comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
-
- if (prefs->patchFromMode) {
- comprParams.windowLog = FIO_highbit64((unsigned long long)maxSrcFileSize + PATCHFROM_WINDOWSIZE_EXTRA_BYTES);
- }
-
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) );
- /* compression level */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
- /* max compressed block size */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
- /* source size hint */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
- /* long distance matching */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
- if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
- }
- if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
- }
- /* compression parameters */
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
- /* multi-threading */
+ if (dictFileName && (ress.dictBuffer==NULL))
+ EXM_THROW(32, "allocation error : can't create dictBuffer");
+ ress.dictFileName = dictFileName;
+
+ if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
+ comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
+
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
+ /* compression level */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
+ /* max compressed block size */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
+ /* source size hint */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
+ /* long distance matching */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
+ if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
+ }
+ if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
+ }
+ /* compression parameters */
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
+ /* multi-threading */
#ifdef ZSTD_MULTITHREAD
- DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
- if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
- DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
- }
- CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
+ DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
+ if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
+ DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
+ }
+ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif
- /* dictionary */
- CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
- free(dictBuffer);
+ /* dictionary */
+ if (prefs->patchFromMode) {
+ CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
+ } else {
+ CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
}
-
+
return ress;
}
{
free(ress.srcBuffer);
free(ress.dstBuffer);
+ free(ress.dictBuffer);
ZSTD_freeCStream(ress.cctx); /* never fails */
}
const char* srcFileName, const char* dictFileName,
int compressionLevel, ZSTD_compressionParameters comprParams)
{
- cRess_t const ress = FIO_createCResources(prefs, dictFileName, (size_t)UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
+ cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
return dstFileNameBuffer;
}
-static size_t FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
+static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
{
- size_t i, fileSize, maxFileSize = 0;
+ size_t i;
+ unsigned long long fileSize, maxFileSize = 0;
for (i = 0; i < nbFiles; i++) {
- fileSize = (size_t)UTIL_getFileSize(inFileNames[i]);
+ fileSize = UTIL_getFileSize(inFileNames[i]);
maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
}
return maxFileSize;
dRess_t ress;
memset(&ress, 0, sizeof(ress));
+ if (prefs->patchFromMode)
+ FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
+
/* Allocation */
ress.dctx = ZSTD_createDStream();
if (ress.dctx==NULL)
selection, namely that windowSize > srcSize.
Note: cannot use both this and -D together
+ Note: `--long` mode will be automatically activated if chainLog < fileLog
+ (fileLog being the windowLog requried to cover the whole file). You
+ can also manually force it.
+ Node: for all levels, you can use --patch-from in --single-thread mode
+ to improve compression ratio at the cost of speed
+ Note: for level 19, you can get increased compression ratio at the cost
+ of speed by specifying `--zstd=targetLength=` to be something large
+ (i.e 4096), and by setting a large `--zstd=chainLog=`
* `-M#`, `--memory=#`:
Set a memory usage limit. By default, Zstandard uses 128 MB for decompression
as the maximum amount of memory the decompressor is allowed to use, but you can
CLEAN_RETURN(1);
}
+ if (patchFromDictFileName != NULL && filenames->tableSize > 1) {
+ DISPLAY("error : can't use --patch-from=# on multiple files \n");
+ CLEAN_RETURN(1);
+ }
+
/* No status message in pipe mode (stdin - stdout) or multi-files mode */
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1;
/* IO Stream/File */
FIO_setNotificationLevel(g_displayLevel);
FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
- if (patchFromDictFileName != NULL) {
- dictFileName = patchFromDictFileName;
- }
if (memLimit == 0) {
if (compressionParams.windowLog == 0) {
memLimit = (U32)1 << g_defaultMaxWindowLog;
} else {
memLimit = (U32)1 << (compressionParams.windowLog & 31);
} }
+ if (patchFromDictFileName != NULL)
+ dictFileName = patchFromDictFileName;
FIO_setMemLimit(prefs, memLimit);
if (operation==zom_compress) {
#ifndef ZSTD_NOCOMPRESS
ddict = ZSTD_createDDict(dict.buff, dict.size);
FUZZ_ASSERT(ddict);
} else {
- FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0)
+ FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
dctx, dict.buff, dict.size,
(ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
(ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2)));
+ else
+ FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced(
+ dctx, dict.buff, dict.size,
+ (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2)));
}
{
void const* source, size_t sourceSize,
void const* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictContentType_e dictContentType)
+ ZSTD_dictContentType_e dictContentType,
+ int const refPrefix)
{
ZSTD_CCtx* cctx = ZSTD_createCCtx();
- FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
+ if (refPrefix)
+ FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
+ cctx, dict, dictSize, dictContentType));
+ else
+ FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, dictLoadMethod, dictContentType));
size_t const compressedSize = ZSTD_compress2(
cctx, compressed, compressedCapacity, source, sourceSize);
void const* compressed, size_t compressedSize,
void const* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictContentType_e dictContentType)
+ ZSTD_dictContentType_e dictContentType,
+ int const refPrefix)
{
ZSTD_DCtx* dctx = ZSTD_createDCtx();
- FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
+ if (refPrefix)
+ FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced(
+ dctx, dict, dictSize, dictContentType));
+ else
+ FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
dctx, dict, dictSize, dictLoadMethod, dictContentType));
size_t const resultSize = ZSTD_decompressDCtx(
dctx, result, resultCapacity, compressed, compressedSize);
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ int const refPrefix = FUZZ_dataProducer_uint32Range(producer, 0, 1) != 0;
ZSTD_dictLoadMethod_e const dlm =
size = FUZZ_dataProducer_uint32Range(producer, 0, 1);
ZSTD_dictContentType_e const dct =
FUZZ_ASSERT(cBuf);
size_t const cSize =
- compress(cBuf, cBufSize, src, size, src, size, dlm, dct);
+ compress(cBuf, cBufSize, src, size, src, size, dlm, dct, refPrefix);
/* compression failing is okay */
if (ZSTD_isError(cSize)) {
FUZZ_ASSERT_MSG(dct != ZSTD_dct_rawContent, "Raw must always succeed!");
goto out;
}
size_t const rSize =
- decompress(rBuf, size, cBuf, cSize, src, size, dlm, dct);
+ decompress(rBuf, size, cBuf, cSize, src, size, dlm, dct, refPrefix);
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
BYTE* const dst = (BYTE*)compressedBuffer;
ZSTD_DCtx* dctx = ZSTD_createDCtx();
-
+
/* create a large frame and then a bunch of small frames */
- size_t srcSize = ZSTD_compress((void*)dst,
+ size_t srcSize = ZSTD_compress((void*)dst,
compressedBufferSize, CNBuffer, largeFrameSrcSize, 3);
- for (i = 0; i < nbFrames; i++)
- srcSize += ZSTD_compress((void*)(dst + srcSize),
- compressedBufferSize - srcSize, CNBuffer,
+ for (i = 0; i < nbFrames; i++)
+ srcSize += ZSTD_compress((void*)(dst + srcSize),
+ compressedBufferSize - srcSize, CNBuffer,
smallFrameSrcSize, 3);
-
+
/* decompressStream and make sure that dctx size was reduced at least once */
while (consumed < srcSize) {
ZSTD_inBuffer in = {(void*)(dst + consumed), MIN(1, srcSize - consumed), 0};
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++);
+ {
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+
+ size_t const size = (1U << 10);
+ size_t const dstCapacity = ZSTD_compressBound(size);
+ void* dict = (void*)malloc(size);
+ void* src = (void*)malloc(size);
+ void* dst = (void*)malloc(dstCapacity);
+
+ RDG_genBuffer(dict, size, 0.5, 0.5, seed);
+ RDG_genBuffer(src, size, 0.5, 0.5, seed);
+
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
+ assert(!ZSTD_isError(ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, size, dict, size, 3)));
+
+ ZSTD_freeCCtx(cctx);
+ free(dict);
+ free(src);
+ free(dst);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ DISPLAYLEVEL(3, "test%3i : testing dict compression with enableLdm and forceMaxWindow : ", testNb++);
+ {
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ void* dict = (void*)malloc(CNBuffSize);
+
+ RDG_genBuffer(dict, CNBuffSize, 0.5, 0.5, seed);
+ RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
+
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
+ assert(!ZSTD_isError(ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
+ CNBuffer, CNBuffSize, dict, CNBuffSize, 3)));
+
+ ZSTD_freeCCtx(cctx);
+ free(dict);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ /* Note: this test takes 0.5 seconds to run */
+ DISPLAYLEVEL(3, "test%3i : testing refPrefx vs refPrefx + ldm (size comparison) : ", testNb++);
+ {
+ /* test a big buffer so that ldm can take effect */
+ size_t const size = 100 MB;
+ int const windowLog = 27;
+ size_t const dstSize = ZSTD_compressBound(size);
+
+ void* dict = (void*)malloc(size);
+ void* src = (void*)malloc(size);
+ void* dst = (void*)malloc(dstSize);
+ void* recon = (void*)malloc(size);
+
+ size_t refPrefixCompressedSize = 0;
+ size_t refPrefixLdmComrpessedSize = 0;
+ size_t reconSize = 0;
+
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+
+ /* make dict and src the same uncompressible data */
+ RDG_genBuffer(src, size, 0, 0, seed);
+ memcpy(dict, src, size);
+ assert(!memcmp(dict, src, size));
+
+ /* set level 1 and windowLog to cover src */
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, windowLog));
+
+ /* compress on level 1 using just refPrefix and no ldm */
+ ZSTD_CCtx_refPrefix(cctx, dict, size);
+ refPrefixCompressedSize = ZSTD_compress2(cctx, dst, dstSize, src, size);
+ assert(!ZSTD_isError(refPrefixCompressedSize));
+
+ /* test round trip just refPrefix */
+ ZSTD_DCtx_refPrefix(dctx, dict, size);
+ reconSize = ZSTD_decompressDCtx(dctx, recon, size, dst, refPrefixCompressedSize);
+ assert(!ZSTD_isError(reconSize));
+ assert(reconSize == size);
+ assert(!memcmp(recon, src, size));
+
+ /* compress on level 1 using refPrefix and ldm */
+ ZSTD_CCtx_refPrefix(cctx, dict, size);;
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1))
+ refPrefixLdmComrpessedSize = ZSTD_compress2(cctx, dst, dstSize, src, size);
+ assert(!ZSTD_isError(refPrefixLdmComrpessedSize));
+
+ /* test round trip refPrefix + ldm*/
+ ZSTD_DCtx_refPrefix(dctx, dict, size);
+ reconSize = ZSTD_decompressDCtx(dctx, recon, size, dst, refPrefixLdmComrpessedSize);
+ assert(!ZSTD_isError(reconSize));
+ assert(reconSize == size);
+ assert(!memcmp(recon, src, size));
+
+ /* make sure that refPrefixCompressedSize is significantly greater */
+ assert(refPrefixCompressedSize > 10 * refPrefixLdmComrpessedSize);
+ /* make sure the ldm comrpessed size is less than 1% of original */
+ assert((double)refPrefixLdmComrpessedSize / (double)size < 0.01);
+
+ ZSTD_freeDCtx(dctx);
+ ZSTD_freeCCtx(cctx);
+ free(recon);
+ free(dict);
+ free(src);
+ free(dst);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
DISPLAYLEVEL(3, "test%3d: superblock uncompressible data, too many nocompress superblocks : ", testNb++);
{
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
datagen -g1000 -P50 > tmp_dict
datagen -g1000 -P10 > tmp_patch
-zstd --memory=10000 --patch-from=tmp_dict tmp_patch -o tmp_patch_diff
-zstd -d --memory=10000 --patch-from=tmp_dict tmp_patch_diff -o tmp_patch_recon
+zstd --patch-from=tmp_dict tmp_patch -o tmp_patch_diff
+zstd -d --patch-from=tmp_dict tmp_patch_diff -o tmp_patch_recon
$DIFF -s tmp_patch_recon tmp_patch
rm -rf tmp_*
+println "\n===> patch-from recursive tests"
+
+mkdir tmp_dir
+datagen > tmp_dir/tmp1
+datagen > tmp_dir/tmp2
+datagen > tmp_dict
+zstd --patch-from=tmp_dict -r tmp_dir && die
+rm -rf tmp*
+
println "\n===> large files tests "
roundTripTest -g270000000 1