minor : modified ZSTD_preserveUnsortedMark() to be more vectorization friendly

author Yann Collet <cyan@fb.com>

Mon, 5 Feb 2018 19:46:02 +0000 (11:46 -0800)

committer Yann Collet <cyan@fb.com>

Mon, 5 Feb 2018 19:46:02 +0000 (11:46 -0800)
author Yann Collet <cyan@fb.com>
Mon, 5 Feb 2018 19:46:02 +0000 (11:46 -0800)
committer Yann Collet <cyan@fb.com>
Mon, 5 Feb 2018 19:46:02 +0000 (11:46 -0800)
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c

index 8e3f0035f69a09a0e933ee9e6e06656d58c582d8..4c66c3b77464c1a8053ef71990b133e49d78e422 100644 (file)
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -139,8 +139,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
      {   U32 u;
          for (u=0; u<tableSize; u++) {
              FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-            U16 nextState = symbolNext[symbol]++;
-            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
              tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
      }   }
  
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c

index 7f59eb34a61274fd8ec2a3d7364ea9eb5782d4a8..844afc051a09c30218d15f883e89f111511fdf63 100644 (file)
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -18,33 +18,46 @@
  #define ZSTD_DUBT_UNSORTED_MARK 1   /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor.
                                         It's not a big deal though : the candidate will just be considered unsorted, and be sorted again.
                                         Additionnally, candidate position 1 will be lost.
-                                       But candidate 1 cannot hide a large tree of candidates, so it's a moderate loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled by a table re-use using a different strategy */
+                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
  
  /*! ZSTD_preserveUnsortedMark() :
   *  pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
   *  so that combined operation preserves its value.
   *  Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
- *  As a consequence, the list of unsorted elements would stop on the first element,
- *  removing candidates, resulting in a negligible loss to compression ratio
+ *  As a consequence, the list of unsorted elements would stop at first element,
+ *  removing candidates, resulting in a very small loss to compression ratio
   *  (since overflow protection with ZSTD_reduceTable() is relatively rare).
+ *
   *  Another potential risk is that a position will be promoted from *unsorted*
- *  to *sorted=>smaller:0*, meaning the next candidate will be considered smaller.
+ *  to *sorted=>smaller:0*, meaning next candidate will be considered smaller.
   *  This could be wrong, and result in data corruption.
+ *
   *  On second thought, this corruption might be impossible,
- *  because unsorted elements are always at the beginning of the list,
- *  and squashing to zero reduce the list to a single element,
+ *  because unsorted elements stand at the beginning of the list,
+ *  and squashing to zero reduces the list to a single element,
   *  which needs to be sorted anyway.
   *  I haven't spent much thoughts into this possible scenario,
- *  and just felt it was safer to implement ZSTD_preserveUnsortedMark() */
+ *  and just felt it was safer to implement ZSTD_preserveUnsortedMark()
+ *
+ * `size` : must be a positive multiple of ZSTD_ROWSIZE */
+#define ZSTD_ROWSIZE 16
  void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
  {
-    U32 u;
-    for (u=0; u<size; u++)
-        if (table[u] == ZSTD_DUBT_UNSORTED_MARK)
-            table[u] = ZSTD_DUBT_UNSORTED_MARK + reducerValue;
+    int cellNb = 0;
+    U32 const nbRows = size / ZSTD_ROWSIZE;
+    U32 rowNb;
+    assert((size % ZSTD_ROWSIZE) == 0);
+    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+        int column;
+        for (column=0; column<ZSTD_ROWSIZE; column++) {
+            U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+            table[cellNb] += adder;
+            cellNb++;
+    }   }
  }
  
+
  void ZSTD_updateDUBT(
                  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
                  const BYTE* ip, const BYTE* iend,
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c

index 1a1fb250673996eabf5b162887e93f654d6e8ae0..afae80d1cc426d651b5c4a08f22480ecdca8c2d1 100644 (file)
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -655,6 +655,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  
  typedef union {
      FSE_decode_t realData;
+    FSE_DTable dtable;
      U32 alignedBy4;
  } FSE_decode_t4;
  
@@ -733,7 +734,6 @@ static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTa
                                   const void* src, size_t srcSize,
                                   const FSE_decode_t4* defaultTable, U32 flagRepeatTable)
  {
-    const void* const tmpPtr = defaultTable;   /* bypass strict aliasing */
      switch(type)
      {
      case set_rle :
@@ -743,7 +743,7 @@ static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTa
          *DTablePtr = DTableSpace;
          return 1;
      case set_basic :
-        *DTablePtr = (const FSE_DTable*)tmpPtr;
+        *DTablePtr = &defaultTable->dtable;
          return 0;
      case set_repeat:
          if (!flagRepeatTable) return ERROR(corruption_detected);
author	Yann Collet <cyan@fb.com>
	Mon, 5 Feb 2018 19:46:02 +0000 (11:46 -0800)
committer	Yann Collet <cyan@fb.com>
	Mon, 5 Feb 2018 19:46:02 +0000 (11:46 -0800)
lib/common/fse_decompress.c		patch \| blob \| blame \| history
lib/compress/zstd_lazy.c		patch \| blob \| blame \| history
lib/decompress/zstd_decompress.c		patch \| blob \| blame \| history