]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
minor improvement to level 3 dictionary compression ratio
authorYann Collet <cyan@fb.com>
Wed, 16 Oct 2024 00:58:33 +0000 (17:58 -0700)
committerYann Collet <cyan@fb.com>
Wed, 16 Oct 2024 00:58:33 +0000 (17:58 -0700)
lib/compress/zstd_double_fast.c

index c04010b11d72fe899d86336b84b543c26719cf67..e2b3b4a14b2f85f91c8626354662ec1e3cd8c92e 100644 (file)
@@ -140,7 +140,6 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
     U32 idxl1; /* the long match index for ip1 */
 
     const BYTE* matchl0; /* the long match for ip */
-    const BYTE* matchl0_safe; /* matchl0 or safe address */
     const BYTE* matchs0; /* the short match for ip */
     const BYTE* matchl1; /* the long match for ip1 */
     const BYTE* matchs0_safe; /* matchs0 or safe address */
@@ -201,15 +200,15 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
              * However expression below complies into conditional move. Since
              * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
              * if there is a match, all branches become predictable. */
-            matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
+            {   const BYTE*  const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
 
-            /* check prefix long match */
-            if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
-                mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
-                offset = (U32)(ip-matchl0);
-                while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
-                goto _match_found;
-            }
+                /* check prefix long match */
+                if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
+                    mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
+                    offset = (U32)(ip-matchl0);
+                    while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
+                    goto _match_found;
+            }   }
 
             idxl1 = hashLong[hl1];
             matchl1 = base + idxl1;
@@ -412,14 +411,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
             goto _match_stored;
         }
 
-        if (matchIndexL > prefixLowestIndex) {
+        if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
             /* check prefix long match */
-            if (MEM_read64(matchLong) == MEM_read64(ip)) {
-                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
-                offset = (U32)(ip-matchLong);
-                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
-                goto _match_found;
-            }
+            mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+            offset = (U32)(ip-matchLong);
+            while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+            goto _match_found;
         } else if (dictTagsMatchL) {
             /* check dictMatchState long match */
             U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
@@ -434,7 +431,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
         }   }
 
         if (matchIndexS > prefixLowestIndex) {
-            /* check prefix short match */
+            /* short match  candidate */
             if (MEM_read32(match) == MEM_read32(ip)) {
                 goto _search_next_long;
             }
@@ -464,14 +461,12 @@ _search_next_long:
             hashLong[hl3] = curr + 1;
 
             /* check prefix long +1 match */
-            if (matchIndexL3 > prefixLowestIndex) {
-                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
-                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
-                    ip++;
-                    offset = (U32)(ip-matchL3);
-                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
-                    goto _match_found;
-                }
+            if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
+                mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                ip++;
+                offset = (U32)(ip-matchL3);
+                while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                goto _match_found;
             } else if (dictTagsMatchL3) {
                 /* check dict long +1 match */
                 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;