]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
fixed ZSTD_reduceIndex()
authorYann Collet <cyan@fb.com>
Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
committerYann Collet <cyan@fb.com>
Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
following suggestions from @terrelln.
Also added some comments to present logic behind ZSTD_preserveUnsortedMark().

lib/compress/zstd_compress.c
lib/compress/zstd_lazy.c
tests/playTests.sh

index a804efca5749ef12709e187b14582b88b5ed6c2d..108229dea1d7dd00571304dfebb9747d02453bfd 100644 (file)
@@ -1146,14 +1146,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
         ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
     }
 
-    if (zc->appliedParams.cParams.strategy != ZSTD_btlazy2) {
-        U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
-        ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
-    }
-
     if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
         U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
-        if (zc->appliedParams.cParams.strategy != ZSTD_btlazy2)
+        if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
             ZSTD_preserveUnsortedMark(zc->chainTable, chainSize, reducerValue);
         ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
     }
index 9385bf423be7af70e00bf9b60961b36cd6878cd5..7261cffcf094a9231046ec4abb4fd51c4c172efb 100644 (file)
                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled by a table re-use using a different strategy */
 
 /*! ZSTD_preserveUnsortedMark() :
- *  pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK
- *  before ZSTD_reduceTable()
- *  sp that final operation preserves its value */
+ *  pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
+ *  so that combined operation preserves its value.
+ *  Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
+ *  As a consequence, the list of unsorted elements would stop on the first element,
+ *  removing candidates, resulting in a negligible loss to compression ratio
+ *  (since overflow protection with ZSTD_reduceTable() is relatively rare).
+ *  Another potential risk is that a position will be promoted from *unsorted*
+ *  to *sorted=>smaller:0*, meaning the next candidate will be considered smaller.
+ *  This could be wrong, and result in data corruption.
+ *  On second thought, this corruption might be impossible,
+ *  because unsorted elements are always at the beginning of the list,
+ *  and squashing to zero reduce the list to a single element,
+ *  which needs to be sorted anyway.
+ *  I haven't spent much thoughts into this possible scenario,
+ *  and just felt it was safer to implement ZSTD_preserveUnsortedMark() */
 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
 {
     U32 u;
index ee93b10a53b53546cdcc8cf3f1728e72a332914f..bb68e6529e5756c078f0c42e242e2073f7e5c135 100755 (executable)
@@ -93,6 +93,7 @@ else
     hasMT="true"
 fi
 
+
 $ECHO "\n===>  simple tests "
 
 ./datagen > tmp