Fix merge conflicts

author Nick Magerko <nmagerko@fb.com>

Thu, 22 Aug 2019 18:51:41 +0000 (11:51 -0700)

committer Nick Magerko <nmagerko@fb.com>

Thu, 22 Aug 2019 18:51:41 +0000 (11:51 -0700)
author Nick Magerko <nmagerko@fb.com>
Thu, 22 Aug 2019 18:51:41 +0000 (11:51 -0700)
committer Nick Magerko <nmagerko@fb.com>
Thu, 22 Aug 2019 18:51:41 +0000 (11:51 -0700)
diff --cc lib/compress/zstd_compress.c
Simple merge
diff --cc programs/fileio.c

index 0eda12649394b1bdf65e30cf1fb30724713748b7,873013a514a5bb48b5a4496f621d845d067edb9e..20e2ee2a19161cd824918109ece53c950b469b9a
--- 1/programs/fileio.c
--- 2/programs/fileio.c
+++ b/programs/fileio.c
@@@ -305,8 -304,8 +305,9 @@@ struct FIO_prefs_s 
       int ldmMinMatch;
       int ldmBucketSizeLog;
       int ldmHashRateLog;
+     size_t streamSrcSize;
       size_t targetCBlockSize;
+ +    int srcSizeHint;
       ZSTD_literalCompressionMode_e literalCompressionMode;
   
       /* IO preferences */
@@@ -351,8 -350,8 +352,9 @@@ FIO_prefs_t* FIO_createPreferences(void
       ret->ldmMinMatch = 0;
       ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
       ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
+     ret->streamSrcSize = 0;
       ret->targetCBlockSize = 0;
+ +    ret->srcSizeHint = 0;
       ret->literalCompressionMode = ZSTD_lcm_auto;
       return ret;
   }
diff --cc programs/fileio.h

index fd49a749d96af764fa5230ee4cf5a1d819628948,13f6f1d0590ba997d1a46cf7c536e38117445396..096d90b5caae6e8412e8029beaded512848a84ac
--- 1/programs/fileio.h
--- 2/programs/fileio.h
+++ b/programs/fileio.h
@@@ -71,8 -71,8 +71,9 @@@ void FIO_setOverlapLog(FIO_prefs_t* con
   void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
   void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
   void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
+ void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
   void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
+ +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
   void FIO_setLiteralCompressionMode(
           FIO_prefs_t* const prefs,
           ZSTD_literalCompressionMode_e mode);
diff --cc programs/zstd.1.md

index f8349fa80c6c71c2435245195d451ca676cb12fa,1bdc426540bf3178ec84f070db424df4502944b5..dff4d9eac5115996cc286c2fd67e7c98e0d6ac92
--- 1/programs/zstd.1.md
--- 2/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@@ -144,13 -144,11 +144,18 @@@ the last one takes effect
       Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
       _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
       when combined with multiple worker threads (>=2).
+ * `--stream-size=#` :
+     Sets the pledged source size of input coming from a stream. This value must be exact, as it
+     will be included in the produced frame header. Incorrect stream sizes will cause an error.
+     This information will be used to better optimize compression parameters, resulting in
+     better and potentially faster compression, especially for smaller source sizes.
+ +* `--size-hint=#`:
+ +    When handling input from a stream, `zstd` must guess how large the source size
+ +    will be when optimizing compression parameters. If the stream size is relatively
+ +    small, this guess may be a poor one, resulting in a higher compression ratio than
+ +    expected. This feature allows for controlling the guess when needed.
+ +    Exact guesses result in better compression ratios. Overestimates result in slightly
+ +    degraded compression ratios, while underestimates may result in significant degradation.
   * `--rsyncable` :
       `zstd` will periodically synchronize the compression state to make the
       compressed file more rsync-friendly. There is a negligible impact to
diff --cc programs/zstdcli.c

index 98b9ffb905868e9571ea3faf5c63f93dd0cbce24,401e1ee2c2f3aff7f0a8b77e0ee70934a3125c62..98df728a98b5d8aa507cedfe715169ace3bccf16
--- 1/programs/zstdcli.c
--- 2/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@@ -141,7 -141,7 +141,8 @@@ static int usage_advanced(const char* p
       DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
       DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
       DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+     DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
+ +    DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
       DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
   #ifdef ZSTD_MULTITHREAD
       DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
@@@ -589,8 -589,8 +590,9 @@@ int main(int argCount, const char* argv
       const char* suffix = ZSTD_EXTENSION;
       unsigned maxDictSize = g_defaultMaxDictSize;
       unsigned dictID = 0;
+     size_t streamSrcSize = 0;
       size_t targetCBlockSize = 0;
+ +    size_t srcSizeHint = 0;
       int dictCLevel = g_defaultDictCLevel;
       unsigned dictSelect = g_defaultSelectivityLevel;
   #ifdef UTIL_HAS_CREATEFILELIST
@@@ -747,8 -747,8 +749,9 @@@
                       if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
                       if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
                       if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+                     if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
                       if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
+ +                    if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
                       if (longCommandWArg(&argument, "--long")) {
                           unsigned ldmWindowLog = 0;
                           ldmFlag = 1;
@@@ -1153,8 -1153,8 +1156,9 @@@
           FIO_setAdaptMin(prefs, adaptMin);
           FIO_setAdaptMax(prefs, adaptMax);
           FIO_setRsyncable(prefs, rsyncable);
+         FIO_setStreamSrcSize(prefs, streamSrcSize);
           FIO_setTargetCBlockSize(prefs, targetCBlockSize);
+ +        FIO_setSrcSizeHint(prefs, srcSizeHint);
           FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
           if (adaptMin > cLevel) cLevel = adaptMin;
           if (adaptMax < cLevel) cLevel = adaptMax;
@@@ -1164,7 -1164,7 +1168,7 @@@
           else
             operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
   #else
-         (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
- -        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
++        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
           DISPLAY("Compression not supported \n");
   #endif
       } else {  /* decompression or test */
diff --cc tests/playTests.sh

index 2404ffb64a3b16f5e4d324653fc4210f344ea831,b740767632ebdd69bd483b34a4ef10109b9ba8ad..ad096fdddbdf8a9eb781f78792f57ed7a77acbf5
--- 1/tests/playTests.sh
--- 2/tests/playTests.sh
+++ b/tests/playTests.sh
@@@ -409,36 -408,23 +408,53 @@@ println "compress multiple files includ
   $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
   
   
+ println "\n===>  stream-size mode"
+ 
+ ./datagen -g11000 > tmp
+ println "test : basic file compression vs sized streaming compression"
+ file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+ stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
+ if [ "$stream_size" -gt "$file_size" ]; then
+   die "hinted compression larger than expected"
+ fi
+ println "test : sized streaming compression and decompression"
+ cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
+ $ZSTD -df tmp.zst -o tmp_decompress
+ cmp tmp tmp_decompress || die "difference between original and decompressed file"
+ println "test : incorrect stream size"
+ cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+ 
+ 
+ +println "\n===>  size-hint mode"
+ +
+ +./datagen -g11000 > tmp
+ +./datagen -g11000 > tmp2
+ +./datagen > tmpDict
+ +println "test : basic file compression vs hinted streaming compression"
+ +file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+ +stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c)
+ +if [ "$stream_size" -ge "$file_size" ]; then
+ +  die "hinted compression larger than expected"
+ +fi
+ +println "test : hinted streaming compression and decompression"
+ +cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
+ +$ZSTD -df tmp.zst -o tmp_decompress
+ +cmp tmp tmp_decompress || die "difference between original and decompressed file"
+ +println "test : hinted streaming compression with dictionary"
+ +cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict
+ +println "test : multiple file compression with hints and dictionary"
+ +$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2
+ +$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp
+ +$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2
+ +cmp tmp.zst tmp1_.zst || die "first file's output differs"
+ +cmp tmp2.zst tmp2_.zst || die "second file's output differs"
+ +println "test : incorrect hinted stream sizes"
+ +cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t  # slightly too high
+ +cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t  # slightly too low
+ +cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t  # considerably too high
+ +cat tmp | $ZSTD -14 -f --size-hint=5500  | $ZSTD -t  # considerably too low
+ +
+ +
   println "\n===>  dictionary tests "
   
   println "- test with raw dict (content only) "
diff --cc tests/zstreamtest.c
Simple merge
author	Nick Magerko <nmagerko@fb.com>
	Thu, 22 Aug 2019 18:51:41 +0000 (11:51 -0700)
committer	Nick Magerko <nmagerko@fb.com>
	Thu, 22 Aug 2019 18:51:41 +0000 (11:51 -0700)
		1	2
lib/compress/zstd_compress.c	patch \|	diff1 \|	diff2 \|	blob \| history
programs/fileio.c	patch \|	diff1 \|	diff2 \|	blob \| history
programs/fileio.h	patch \|	diff1 \|	diff2 \|	blob \| history
programs/zstd.1.md	patch \|	diff1 \|	diff2 \|	blob \| history
programs/zstdcli.c	patch \|	diff1 \|	diff2 \|	blob \| history
tests/playTests.sh	patch \|	diff1 \|	diff2 \|	blob \| history
tests/zstreamtest.c	patch \|	diff1 \|	diff2 \|	blob \| history