]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
slightly improved compression ratio at levels 3 & 4
authorYann Collet <cyan@fb.com>
Wed, 16 Oct 2024 19:13:57 +0000 (12:13 -0700)
committerYann Collet <cyan@fb.com>
Thu, 17 Oct 2024 16:37:23 +0000 (09:37 -0700)
The compression ratio benefits are small but consistent, i.e. always positive.
On `silesia.tar` corpus, this modification saves ~75 KB at level 3.
The measured speed cost is negligible, i.e. below noise level, between 0 and -1%.

lib/compress/zstd_double_fast.c

index e2b3b4a14b2f85f91c8626354662ec1e3cd8c92e..72b541ea6c86292708db45d010e73c902c96e9fa 100644 (file)
@@ -252,19 +252,23 @@ _cleanup:
 
 _search_next_long:
 
-        /* check prefix long +1 match */
+        /* short match found: let's check for a longer one */
+        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
+
+        /* check long match at +1 position */
         if (idxl1 > prefixLowestIndex) {
             if (MEM_read64(matchl1) == MEM_read64(ip1)) {
-                ip = ip1;
-                mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
-                offset = (U32)(ip-matchl1);
-                while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
-                goto _match_found;
-            }
+                size_t const llen = ZSTD_count(ip1+8, matchl1+8, iend) + 8;
+                if (llen > mLength) {
+                    ip = ip1;
+                    mLength = llen;
+                    offset = (U32)(ip-matchl1);
+                    while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
+                    goto _match_found;
+            }   }
         }
 
-        /* if no long +1 match, explore the short match we found */
-        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
+        /* validate short match previously found */
         offset = (U32)(ip - matchs0);
         while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */