]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
improve high compression ratio for file like #3793
authorYann Collet <cyan@fb.com>
Tue, 30 Jan 2024 07:25:24 +0000 (23:25 -0800)
committerYann Collet <cyan@fb.com>
Tue, 30 Jan 2024 07:25:24 +0000 (23:25 -0800)
this works great for 32-bit arrays,
notably the synthetic ones, with extreme regularity,
unfortunately, it's not universal,
and in some cases, it's a loss.
Crucially, on average, it's a loss on silesia.
The most negatively impacted file is x-ray.
It deserves an investigation before suggesting it as an evolution.

lib/compress/zstd_opt.c

index 68537e600975cca8c8d234e3acb385d5876008e6..e16e5e4fb1387f6e8fd102f6ccba5fa98dbd10df 100644 (file)
@@ -566,7 +566,7 @@ void ZSTD_updateTree_internal(
     const BYTE* const base = ms->window.base;
     U32 const target = (U32)(ip - base);
     U32 idx = ms->nextToUpdate;
-    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                 idx, target, dictMode);
 
     while(idx < target) {
@@ -1069,6 +1069,10 @@ listStats(const U32* table, int lastEltID)
 
 #endif
 
+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
+
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t
@@ -1122,7 +1126,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             U32 const ll0 = !litlen;
             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                              (U32)(ip-istart), (U32)(iend - ip));
+                                              (U32)(ip-istart), (U32)(iend-ip));
             if (!nbMatches) { ip++; continue; }
 
             /* initialize opt[0] */
@@ -1134,7 +1138,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
              * in every price. We include the literal length to avoid negative
              * prices when we subtract the previous literal length.
              */
-            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+            opt[0].price = LL_PRICE(litlen);
 
             /* large match -> immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;
@@ -1155,11 +1159,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
             /* set prices for first matches starting position == 0 */
             assert(opt[0].price >= 0);
-            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+            {   U32 const literalsPrice = (U32)opt[0].price + (U32)LL_PRICE(0);
                 U32 pos;
                 U32 matchNb;
                 for (pos = 1; pos < minMatch; pos++) {
                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                    opt[pos].mlen = 0;
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                     U32 const offBase = matches[matchNb].off;
@@ -1173,7 +1178,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         opt[pos].off = offBase;
                         opt[pos].litlen = litlen;
                         opt[pos].price = (int)sequencePrice;
-                }   }
+                    }
+                    opt[pos].price = ZSTD_MAX_PRICE;
+                }
                 last_pos = pos-1;
             }
         }
@@ -1187,18 +1194,38 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             /* Fix current position with one literal if cheaper */
             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
                 int const price = opt[cur-1].price
-                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
-                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
-                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                                + LIT_PRICE(ip+cur-1)
+                                + LL_INCPRICE(litlen);
                 assert(price < 1000000000); /* overflow check */
                 if (price <= opt[cur].price) {
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    opt[cur].mlen = 0;
-                    opt[cur].off = 0;
-                    opt[cur].litlen = litlen;
-                    opt[cur].price = price;
+                    if ( (optLevel == 2) /* additional check only for high modes */
+                      && (opt[cur].mlen > 0) /* interrupt a match */
+                      && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */
+                    {
+                        /* check next position, in case it would be cheaper */
+                        int with1literal = opt[cur].price + LL_INCPRICE(1);
+                        int withMoreLiterals = price + LL_INCPRICE(litlen+1);
+                        DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f",
+                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
+                        if (with1literal < withMoreLiterals) {
+                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals));
+                            /* do not take this literal */
+                        } else {
+                            opt[cur].mlen = 0;
+                            opt[cur].off = 0;
+                            opt[cur].litlen = litlen;
+                            opt[cur].price = price;
+                        }
+                    } else {
+                        /* normal case: take the literal, it's expected to be cheaper */
+                        opt[cur].mlen = 0;
+                        opt[cur].off = 0;
+                        opt[cur].litlen = litlen;
+                        opt[cur].price = price;
+                    }
                 } else {
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -1236,7 +1263,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             {   U32 const ll0 = (opt[cur].mlen != 0);
                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                 U32 const previousPrice = (U32)opt[cur].price;
-                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 const basePrice = previousPrice + (U32)LL_PRICE(0);
                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
 
@@ -1291,6 +1318,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
                         }
             }   }   }
+            opt[last_pos+1].price = ZSTD_MAX_PRICE;
         }  /* for (cur = 1; cur <= last_pos; cur++) */
 
         lastSequence = opt[last_pos];