fixed ZSTD_reduceIndex()

author Yann Collet <cyan@fb.com>

Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)

committer Yann Collet <cyan@fb.com>

Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
author Yann Collet <cyan@fb.com>
Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
committer Yann Collet <cyan@fb.com>
Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c

index a804efca5749ef12709e187b14582b88b5ed6c2d..108229dea1d7dd00571304dfebb9747d02453bfd 100644 (file)
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1146,14 +1146,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
          ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
      }
  
-    if (zc->appliedParams.cParams.strategy != ZSTD_btlazy2) {
-        U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
-        ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
-    }
-
      if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
          U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
-        if (zc->appliedParams.cParams.strategy != ZSTD_btlazy2)
+        if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
              ZSTD_preserveUnsortedMark(zc->chainTable, chainSize, reducerValue);
          ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
      }
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c

index 9385bf423be7af70e00bf9b60961b36cd6878cd5..7261cffcf094a9231046ec4abb4fd51c4c172efb 100644 (file)
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -22,9 +22,21 @@
                                         The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled by a table re-use using a different strategy */
  
  /*! ZSTD_preserveUnsortedMark() :
- *  pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK
- *  before ZSTD_reduceTable()
- *  sp that final operation preserves its value */
+ *  pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
+ *  so that combined operation preserves its value.
+ *  Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
+ *  As a consequence, the list of unsorted elements would stop on the first element,
+ *  removing candidates, resulting in a negligible loss to compression ratio
+ *  (since overflow protection with ZSTD_reduceTable() is relatively rare).
+ *  Another potential risk is that a position will be promoted from *unsorted*
+ *  to *sorted=>smaller:0*, meaning the next candidate will be considered smaller.
+ *  This could be wrong, and result in data corruption.
+ *  On second thought, this corruption might be impossible,
+ *  because unsorted elements are always at the beginning of the list,
+ *  and squashing to zero reduce the list to a single element,
+ *  which needs to be sorted anyway.
+ *  I haven't spent much thoughts into this possible scenario,
+ *  and just felt it was safer to implement ZSTD_preserveUnsortedMark() */
  void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
  {
      U32 u;
diff --git a/tests/playTests.sh b/tests/playTests.sh

index ee93b10a53b53546cdcc8cf3f1728e72a332914f..bb68e6529e5756c078f0c42e242e2073f7e5c135 100755 (executable)
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -93,6 +93,7 @@ else
      hasMT="true"
  fi
  
+
  $ECHO "\n===>  simple tests "
  
  ./datagen > tmp
author	Yann Collet <cyan@fb.com>
	Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
committer	Yann Collet <cyan@fb.com>
	Sat, 6 Jan 2018 11:31:26 +0000 (12:31 +0100)
lib/compress/zstd_compress.c		patch \| blob \| blame \| history
lib/compress/zstd_lazy.c		patch \| blob \| blame \| history
tests/playTests.sh		patch \| blob \| blame \| history