changed PREFETCH() macro into PREFETCH_L2()

author Yann Collet <cyan@fb.com>

Tue, 13 Nov 2018 01:05:32 +0000 (17:05 -0800)

committer Yann Collet <cyan@fb.com>

Tue, 13 Nov 2018 01:05:32 +0000 (17:05 -0800)
author Yann Collet <cyan@fb.com>
Tue, 13 Nov 2018 01:05:32 +0000 (17:05 -0800)
committer Yann Collet <cyan@fb.com>
Tue, 13 Nov 2018 01:05:32 +0000 (17:05 -0800)
diff --git a/lib/common/compiler.h b/lib/common/compiler.h

index cc830b2b475126eef1d875141e16d55954c2761d..e6267e90b974a66d609ea11f1c1b43cbbd4035a4 100644 (file)
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -92,18 +92,18 @@
   * can be disabled, by declaring NO_PREFETCH build macro */
  #if defined(NO_PREFETCH)
  #  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
-#  define PREFETCH(ptr)     (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
  #else
  #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
  #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
  #    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#    define PREFETCH(ptr)     _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+#    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
  #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
  #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
-#    define PREFETCH(ptr)     __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
  #  else
  #    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
-#    define PREFETCH(ptr)    (void)(ptr)  /* disabled */
+#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
  #  endif
  #endif  /* NO_PREFETCH */
  
@@ -114,7 +114,7 @@
      size_t const _size = (size_t)(s);     \
      size_t _pos;                          \
      for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
-        PREFETCH(_ptr + _pos);            \
+        PREFETCH_L2(_ptr + _pos);         \
      }                                     \
  }
  
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c

index af615e07763d6126383bc78a02042a12cb29fc25..e86f225af4a62da4847af328d56b6491a531dd75 100644 (file)
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -63,12 +63,13 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  static void
  ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
                   U32 current, const BYTE* inputEnd,
-                 U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
+                 U32 nbCompares, U32 btLow,
+                 const ZSTD_dictMode_e dictMode)
  {
      const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32*   const bt = ms->chainTable;
-    U32    const btLog  = cParams->chainLog - 1;
-    U32    const btMask = (1 << btLog) - 1;
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
      size_t commonLengthSmaller=0, commonLengthLarger=0;
      const BYTE* const base = ms->window.base;
      const BYTE* const dictBase = ms->window.dictBase;
@@ -80,7 +81,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
      const BYTE* match;
      U32* smallerPtr = bt + 2*(current&btMask);
      U32* largerPtr  = smallerPtr + 1;
-    U32 matchIndex = *smallerPtr;
+    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
      U32 dummy32;   /* to be nullified at the end */
      U32 const windowLow = ms->window.lowLimit;
  
@@ -93,6 +94,9 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
          U32* const nextPtr = bt + 2*(matchIndex & btMask);
          size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
          assert(matchIndex < current);
+        /* note : all candidates are now supposed sorted,
+         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
+         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
  
          if ( (dictMode != ZSTD_extDict)
            || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
@@ -108,7 +112,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
              match = dictBase + matchIndex;
              matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
              if (matchIndex+matchLength >= dictLimit)
-                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
          }
  
          DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
@@ -258,7 +262,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
           && (nbCandidates > 1) ) {
          DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
                      matchIndex);
-        *unsortedMark = previousCandidate;
+        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
          previousCandidate = matchIndex;
          matchIndex = *nextCandidate;
          nextCandidate = bt + 2*(matchIndex&btMask);
@@ -266,11 +270,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
          nbCandidates --;
      }
  
+    /* nullify last candidate if it's still unsorted
+     * simplification, detrimental to compression ratio, beneficial for speed */
      if ( (matchIndex > unsortLimit)
        && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
          DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
                      matchIndex);
-        *nextCandidate = *unsortedMark = 0;   /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
+        *nextCandidate = *unsortedMark = 0;
      }
  
      /* batch sort stacked candidates */
@@ -285,14 +291,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
      }
  
      /* find longest match */
-    {   size_t commonLengthSmaller=0, commonLengthLarger=0;
+    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
          const BYTE* const dictBase = ms->window.dictBase;
          const U32 dictLimit = ms->window.dictLimit;
          const BYTE* const dictEnd = dictBase + dictLimit;
          const BYTE* const prefixStart = base + dictLimit;
          U32* smallerPtr = bt + 2*(current&btMask);
          U32* largerPtr  = bt + 2*(current&btMask) + 1;
-        U32 matchEndIdx = current+8+1;
+        U32 matchEndIdx = current + 8 + 1;
          U32 dummy32;   /* to be nullified at the end */
          size_t bestLength = 0;
  
@@ -433,7 +439,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
  /* *********************************
  *  Hash Chain
  ***********************************/
-#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
  
  /* Update chains up to ip (excluded)
     Assumption : always within prefix (i.e. not within extDict) */
@@ -497,6 +503,7 @@ size_t ZSTD_HcFindBestMatch_generic (
          size_t currentMl=0;
          if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
              const BYTE* const match = base + matchIndex;
+            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
              if (match[ml] == ip[ml])   /* potentially better */
                  currentMl = ZSTD_count(ip, match, iLimit);
          } else {
author	Yann Collet <cyan@fb.com>
	Tue, 13 Nov 2018 01:05:32 +0000 (17:05 -0800)
committer	Yann Collet <cyan@fb.com>
	Tue, 13 Nov 2018 01:05:32 +0000 (17:05 -0800)
lib/common/compiler.h		patch \| blob \| blame \| history
lib/compress/zstd_lazy.c		patch \| blob \| blame \| history