From: Nick Magerko Date: Thu, 22 Aug 2019 18:51:41 +0000 (-0700) Subject: Fix merge conflicts X-Git-Tag: v1.4.4~1^2~72^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=493f95c7dffb3ef4c729514350d2d3b1b27e797f;p=thirdparty%2Fzstd.git Fix merge conflicts --- 493f95c7dffb3ef4c729514350d2d3b1b27e797f diff --cc programs/fileio.c index 0eda12649,873013a51..20e2ee2a1 --- a/programs/fileio.c +++ b/programs/fileio.c @@@ -305,8 -304,8 +305,9 @@@ struct FIO_prefs_s int ldmMinMatch; int ldmBucketSizeLog; int ldmHashRateLog; + size_t streamSrcSize; size_t targetCBlockSize; + int srcSizeHint; ZSTD_literalCompressionMode_e literalCompressionMode; /* IO preferences */ @@@ -351,8 -350,8 +352,9 @@@ FIO_prefs_t* FIO_createPreferences(void ret->ldmMinMatch = 0; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->streamSrcSize = 0; ret->targetCBlockSize = 0; + ret->srcSizeHint = 0; ret->literalCompressionMode = ZSTD_lcm_auto; return ret; } diff --cc programs/fileio.h index fd49a749d,13f6f1d05..096d90b5c --- a/programs/fileio.h +++ b/programs/fileio.h @@@ -71,8 -71,8 +71,9 @@@ void FIO_setOverlapLog(FIO_prefs_t* con void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); + void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint); void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, ZSTD_literalCompressionMode_e mode); diff --cc programs/zstd.1.md index f8349fa80,1bdc42654..dff4d9eac --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@@ -144,13 -144,11 +144,18 @@@ the last one takes effect Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. _note_ : at the time of this writing, `--adapt` can remain stuck at low speed when combined with multiple worker threads (>=2). + * `--stream-size=#` : + Sets the pledged source size of input coming from a stream. This value must be exact, as it + will be included in the produced frame header. Incorrect stream sizes will cause an error. + This information will be used to better optimize compression parameters, resulting in + better and potentially faster compression, especially for smaller source sizes. +* `--size-hint=#`: + When handling input from a stream, `zstd` must guess how large the source size + will be when optimizing compression parameters. If the stream size is relatively + small, this guess may be a poor one, resulting in a higher compression ratio than + expected. This feature allows for controlling the guess when needed. + Exact guesses result in better compression ratios. Overestimates result in slightly + degraded compression ratios, while underestimates may result in significant degradation. * `--rsyncable` : `zstd` will periodically synchronize the compression state to make the compressed file more rsync-friendly. There is a negligible impact to diff --cc programs/zstdcli.c index 98b9ffb90,401e1ee2c..98df728a9 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@@ -141,7 -141,7 +141,8 @@@ static int usage_advanced(const char* p DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); + DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n"); + DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); @@@ -589,8 -589,8 +590,9 @@@ int main(int argCount, const char* argv const char* suffix = ZSTD_EXTENSION; unsigned maxDictSize = g_defaultMaxDictSize; unsigned dictID = 0; + size_t streamSrcSize = 0; size_t targetCBlockSize = 0; + size_t srcSizeHint = 0; int dictCLevel = g_defaultDictCLevel; unsigned dictSelect = g_defaultSelectivityLevel; #ifdef UTIL_HAS_CREATEFILELIST @@@ -747,8 -747,8 +749,9 @@@ if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } + if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--long")) { unsigned ldmWindowLog = 0; ldmFlag = 1; @@@ -1153,8 -1153,8 +1156,9 @@@ FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); + FIO_setStreamSrcSize(prefs, streamSrcSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize); + FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; @@@ -1164,7 -1164,7 +1168,7 @@@ else operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */ - (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ ++ (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); #endif } else { /* decompression or test */ diff --cc tests/playTests.sh index 2404ffb64,b74076763..ad096fddd --- a/tests/playTests.sh +++ b/tests/playTests.sh @@@ -409,36 -408,23 +408,53 @@@ println "compress multiple files includ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" + println "\n===> stream-size mode" + + ./datagen -g11000 > tmp + println "test : basic file compression vs sized streaming compression" + file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst) + stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c) + if [ "$stream_size" -gt "$file_size" ]; then + die "hinted compression larger than expected" + fi + println "test : sized streaming compression and decompression" + cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst + $ZSTD -df tmp.zst -o tmp_decompress + cmp tmp tmp_decompress || die "difference between original and decompressed file" + println "test : incorrect stream size" + cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" + + +println "\n===> size-hint mode" + +./datagen -g11000 > tmp +./datagen -g11000 > tmp2 +./datagen > tmpDict +println "test : basic file compression vs hinted streaming compression" +file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst) +stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c) +if [ "$stream_size" -ge "$file_size" ]; then + die "hinted compression larger than expected" +fi +println "test : hinted streaming compression and decompression" +cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000 +$ZSTD -df tmp.zst -o tmp_decompress +cmp tmp tmp_decompress || die "difference between original and decompressed file" +println "test : hinted streaming compression with dictionary" +cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict +println "test : multiple file compression with hints and dictionary" +$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2 +$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp +$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2 +cmp tmp.zst tmp1_.zst || die "first file's output differs" +cmp tmp2.zst tmp2_.zst || die "second file's output differs" +println "test : incorrect hinted stream sizes" +cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t # slightly too high +cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t # slightly too low +cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t # considerably too high +cat tmp | $ZSTD -14 -f --size-hint=5500 | $ZSTD -t # considerably too low + + println "\n===> dictionary tests " println "- test with raw dict (content only) "