Pass iend into ZSTD_storeSeq() to allow ZSTD_wildcopy()

author Nick Terrell <nickrterrell@gmail.com>

Fri, 20 Sep 2019 07:52:55 +0000 (00:52 -0700)

committer Nick Terrell <nickrterrell@gmail.com>

Fri, 20 Sep 2019 07:56:20 +0000 (00:56 -0700)
author Nick Terrell <nickrterrell@gmail.com>
Fri, 20 Sep 2019 07:52:55 +0000 (00:52 -0700)
committer Nick Terrell <nickrterrell@gmail.com>
Fri, 20 Sep 2019 07:56:20 +0000 (00:56 -0700)
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h

index fefa8aff59c82fec11230048c06f255f9fffda5f..579bd5d43470dc68380d193a50c84e206d76a106 100644 (file)
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -344,8 +344,9 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
   *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
   *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
   *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
  */
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offCode, size_t mlBase)
+MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
  {
  #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
      static const BYTE* g_start = NULL;
@@ -362,7 +363,11 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
      /* We are guaranteed at least 8 bytes of literals space because of HASH_READ_SIZE and
       * MINMATCH.
       */
-    ZSTD_wildcopy8(seqStorePtr->lit, literals, (ptrdiff_t)litLength);
+    assert(litLimit - literals >= HASH_READ_SIZE + MINMATCH);
+    if (litLimit - literals >= WILDCOPY_OVERLENGTH)
+           ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
+    else
+           ZSTD_wildcopy8(seqStorePtr->lit, literals, (ptrdiff_t)litLength);
      seqStorePtr->lit += litLength;
  
      /* literal Length */
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c

index 54467cc31bd2c41babdb698dd9e7862b3b2ce96e..a661a48534dac3850cff49d5f1deb231fb86a5ad 100644 (file)
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
              const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
              mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
              ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
              goto _match_stored;
          }
  
@@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
            && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
              mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
              ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
              goto _match_stored;
          }
  
@@ -247,7 +247,7 @@ _match_found:
          offset_2 = offset_1;
          offset_1 = offset;
  
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
  
  _match_stored:
          /* match found */
@@ -278,7 +278,7 @@ _match_stored:
                          const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                          size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
                          U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                        ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                          hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                          hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                          ip += repLength2;
@@ -297,7 +297,7 @@ _match_stored:
                      U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
                      hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                      hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
                      ip += rLength;
                      anchor = ip;
                      continue;   /* faster when present ... (?) */
@@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
              const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
              mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
              ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
          } else {
              if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                  offset_2 = offset_1;
                  offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
  
              } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                  }
                  offset_2 = offset_1;
                  offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
  
              } else {
                  ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                      const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                      size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                      U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                      hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                      hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                      ip += repLength2;
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c

index 75aec8a371f345a84540b0f191e0bdc0b06a8a47..6dbefee6b7fcdc5fdfdddda45b9242321ec68e3b 100644 (file)
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -136,7 +136,7 @@ _offset: /* Requires: ip0, match0 */
  _match: /* Requires: ip0, match0, offcode */
          /* Count the forward length */
          mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
-        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, offcode, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
          /* match found */
          ip0 += mLength;
          anchor = ip0;
@@ -156,7 +156,7 @@ _match: /* Requires: ip0, match0, offcode */
                  hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
                  ip0 += rLength;
                  ip1 = ip0 + 1;
-                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, 0 /*offCode*/, rLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
                  anchor = ip0;
                  continue;   /* faster when present (confirmed on gcc-8) ... (?) */
              }
@@ -261,7 +261,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
              const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
              mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
              ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
          } else if ( (matchIndex <= prefixStartIndex) ) {
              size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
              U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -281,7 +281,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                  } /* catch up */
                  offset_2 = offset_1;
                  offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
              }
          } else if (MEM_read32(match) != MEM_read32(ip)) {
              /* it's not a match, and we're not going to check the dictionary */
@@ -296,7 +296,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                   && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
              offset_2 = offset_1;
              offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
          }
  
          /* match found */
@@ -321,7 +321,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                      const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                      size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                      U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                      hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                      ip += repLength2;
                      anchor = ip;
@@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
              const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
              size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
              ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, rLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
              ip += rLength;
              anchor = ip;
          } else {
@@ -427,7 +427,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                  size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                  offset_2 = offset_1; offset_1 = offset;  /* update offset history */
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
                  ip += mLength;
                  anchor = ip;
          }   }
@@ -446,7 +446,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                      const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                      size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                      { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, 0 /*offcode*/, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
                      hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                      ip += repLength2;
                      anchor = ip;
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c

index 0af41724c7cefbddd21d690265f7280ff327505b..9ad7e03b54ca0e6ddf27c37bbb77c3acb87e8e50 100644 (file)
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic(
          /* store sequence */
  _storeSequence:
          {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
              anchor = ip = start + matchLength;
          }
  
@@ -828,7 +828,7 @@ _storeSequence:
                      const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                      matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
                      offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                      ip += matchLength;
                      anchor = ip;
                      continue;
@@ -843,7 +843,7 @@ _storeSequence:
                  /* store sequence */
                  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                  ip += matchLength;
                  anchor = ip;
                  continue;   /* faster when present ... (?) */
@@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
          /* store sequence */
  _storeSequence:
          {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
              anchor = ip = start + matchLength;
          }
  
@@ -1066,7 +1066,7 @@ _storeSequence:
                  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                  ip += matchLength;
                  anchor = ip;
                  continue;   /* faster when present ... (?) */
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c

index 3dcf86e6e8a32ac75399fc3bbe2442f86d76f980..fc3f4694307c7d9fdbd280a2ea60c9246f824e7b 100644 (file)
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
                  rep[i] = rep[i-1];
              rep[0] = sequence.offset;
              /* Store the sequence */
-            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
                            sequence.offset + ZSTD_REP_MOVE,
                            sequence.matchLength - MINMATCH);
              ip += sequence.matchLength;
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c

index 2da363f93ef2c5e75ff508b6c6e18512196414a3..2e50fca6ff53f5790d85aaaec737961c492fdaca 100644 (file)
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1098,7 +1098,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
  
                      assert(anchor + llen <= iend);
                      ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
                      anchor += advance;
                      ip = anchor;
              }   }
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c

index 27ce137f35dfc3f119412091803689dbd3cfd26d..cbb66c8dba0a3bb6896ef40f3c9b605f2b8bb375 100644 (file)
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -761,10 +761,10 @@ size_t ZSTD_execSequence(BYTE* op,
       * without overlap checking.
       */
      if (sequence.offset >= WILDCOPY_VECLEN) {
-        /* Split out matchLength <= 32 since it is nearly always true. +1% on gcc-9.
-        * We copy 32 bytes here since matches are generally longer than literals.
-        * In silesia, for example ~10% of matches are longer than 16 bytes.
-        */
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
          ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
          return sequenceLength;
      }
author	Nick Terrell <nickrterrell@gmail.com>
	Fri, 20 Sep 2019 07:52:55 +0000 (00:52 -0700)
committer	Nick Terrell <nickrterrell@gmail.com>
	Fri, 20 Sep 2019 07:56:20 +0000 (00:56 -0700)
lib/compress/zstd_compress_internal.h		patch \| blob \| blame \| history
lib/compress/zstd_double_fast.c		patch \| blob \| blame \| history
lib/compress/zstd_fast.c		patch \| blob \| blame \| history
lib/compress/zstd_lazy.c		patch \| blob \| blame \| history
lib/compress/zstd_ldm.c		patch \| blob \| blame \| history
lib/compress/zstd_opt.c		patch \| blob \| blame \| history
lib/decompress/zstd_decompress_block.c		patch \| blob \| blame \| history