int ldmMinMatch;
int ldmBucketSizeLog;
int ldmHashRateLog;
+ size_t streamSrcSize;
size_t targetCBlockSize;
+ int srcSizeHint;
ZSTD_literalCompressionMode_e literalCompressionMode;
/* IO preferences */
ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
+ ret->streamSrcSize = 0;
ret->targetCBlockSize = 0;
+ ret->srcSizeHint = 0;
ret->literalCompressionMode = ZSTD_lcm_auto;
return ret;
}
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
+ void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
+void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode);
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
_note_ : at the time of this writing, `--adapt` can remain stuck at low speed
when combined with multiple worker threads (>=2).
+ * `--stream-size=#` :
+ Sets the pledged source size of input coming from a stream. This value must be exact, as it
+ will be included in the produced frame header. Incorrect stream sizes will cause an error.
+ This information will be used to better optimize compression parameters, resulting in
+ better and potentially faster compression, especially for smaller source sizes.
+* `--size-hint=#`:
+ When handling input from a stream, `zstd` must guess how large the source size
+ will be when optimizing compression parameters. If the stream size is relatively
+ small, this guess may be a poor one, resulting in a higher compression ratio than
+ expected. This feature allows for controlling the guess when needed.
+ Exact guesses result in better compression ratios. Overestimates result in slightly
+ degraded compression ratios, while underestimates may result in significant degradation.
* `--rsyncable` :
`zstd` will periodically synchronize the compression state to make the
compressed file more rsync-friendly. There is a negligible impact to
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+ DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
+ DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
#ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0;
+ size_t streamSrcSize = 0;
size_t targetCBlockSize = 0;
+ size_t srcSizeHint = 0;
int dictCLevel = g_defaultDictCLevel;
unsigned dictSelect = g_defaultSelectivityLevel;
#ifdef UTIL_HAS_CREATEFILELIST
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+ if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
+ if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0;
ldmFlag = 1;
FIO_setAdaptMin(prefs, adaptMin);
FIO_setAdaptMax(prefs, adaptMax);
FIO_setRsyncable(prefs, rsyncable);
+ FIO_setStreamSrcSize(prefs, streamSrcSize);
FIO_setTargetCBlockSize(prefs, targetCBlockSize);
+ FIO_setSrcSizeHint(prefs, srcSizeHint);
FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
if (adaptMin > cLevel) cLevel = adaptMin;
if (adaptMax < cLevel) cLevel = adaptMax;
else
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else
- (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
- (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
++ (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n");
#endif
} else { /* decompression or test */
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
+ println "\n===> stream-size mode"
+
+ ./datagen -g11000 > tmp
+ println "test : basic file compression vs sized streaming compression"
+ file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+ stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
+ if [ "$stream_size" -gt "$file_size" ]; then
+ die "hinted compression larger than expected"
+ fi
+ println "test : sized streaming compression and decompression"
+ cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
+ $ZSTD -df tmp.zst -o tmp_decompress
+ cmp tmp tmp_decompress || die "difference between original and decompressed file"
+ println "test : incorrect stream size"
+ cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+
+
+println "\n===> size-hint mode"
+
+./datagen -g11000 > tmp
+./datagen -g11000 > tmp2
+./datagen > tmpDict
+println "test : basic file compression vs hinted streaming compression"
+file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c)
+if [ "$stream_size" -ge "$file_size" ]; then
+ die "hinted compression larger than expected"
+fi
+println "test : hinted streaming compression and decompression"
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : hinted streaming compression with dictionary"
+cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict
+println "test : multiple file compression with hints and dictionary"
+$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2
+$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp
+$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2
+cmp tmp.zst tmp1_.zst || die "first file's output differs"
+cmp tmp2.zst tmp2_.zst || die "second file's output differs"
+println "test : incorrect hinted stream sizes"
+cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t # slightly too high
+cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t # slightly too low
+cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t # considerably too high
+cat tmp | $ZSTD -14 -f --size-hint=5500 | $ZSTD -t # considerably too low
+
+
println "\n===> dictionary tests "
println "- test with raw dict (content only) "