just add some comments to zstd_opt for improved clarity

author Yann Collet <cyan@fb.com>

Fri, 19 Aug 2022 23:04:28 +0000 (16:04 -0700)

committer Yann Collet <cyan@fb.com>

Thu, 29 Dec 2022 00:24:12 +0000 (16:24 -0800)
author Yann Collet <cyan@fb.com>
Fri, 19 Aug 2022 23:04:28 +0000 (16:04 -0700)
committer Yann Collet <cyan@fb.com>
Thu, 29 Dec 2022 00:24:12 +0000 (16:24 -0800)
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c

index 6e08dc8c7329ba255f0a0ef4dbceb5ff73b95596..f0f3791e7d30084e6fa65b5f3b287200fc31b430 100644 (file)
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -37,11 +37,16 @@
  #  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
  #endif
  
+/* ZSTD_bitWeight() :
+ * provide estimated "cost" of a stat in full bits only */
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
  {
      return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
  }
  
+/* ZSTD_fracWeight() :
+ * provide fractional-bit "cost" of a stat,
+ * using linear interpolation approximation */
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
  {
      U32 const stat = rawStat + 1;
@@ -50,6 +55,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
      U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
      U32 const weight = BWeight + FWeight;
      assert(hb + BITCOST_ACCURACY < 31);
+    assert(FWeight < BITCOST_MULTIPLIER);
      return weight;
  }
  
@@ -91,17 +97,19 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
  static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
  {
      U32 s, sum=0;
-    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
+    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
+            (unsigned)lastEltIndex+1, (unsigned)shift );
      assert(shift < 30);
      for (s=0; s<lastEltIndex+1; s++) {
-        table[s] = 1 + (table[s] >> shift);
-        sum += table[s];
+        unsigned newStat = 1 + (table[s] >> shift);
+        sum += newStat;
+        table[s] = newStat;
      }
      return sum;
  }
  
  /* ZSTD_scaleStats() :
- * reduce all elements in table is sum too large
+ * reduce all elt frequencies in table if sum too large
   * return the resulting sum of elements */
  static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
  {
@@ -129,18 +137,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
      DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
      optPtr->priceType = zop_dynamic;
  
-    if (optPtr->litLengthSum == 0) {  /* first block : init */
-        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
-            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
+
+        /* heuristic: use pre-defined stats for too small inputs */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
+            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
              optPtr->priceType = zop_predef;
          }
  
          assert(optPtr->symbolCosts != NULL);
          if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
-            /* huffman table presumed generated by dictionary */
+
+            /* huffman stats covering the full value set : table presumed generated by dictionary */
              optPtr->priceType = zop_dynamic;
  
              if (compressedLiterals) {
+                /* generate literals statistics from huffman table */
                  unsigned lit;
                  assert(optPtr->litFreq != NULL);
                  optPtr->litSum = 0;
@@ -188,10 +200,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
                      optPtr->offCodeSum += optPtr->offCodeFreq[of];
              }   }
  
-        } else {  /* not a dictionary */
+        } else {  /* huf.repeatMode != HUF_repeat_valid => presumed not a dictionary */
  
              assert(optPtr->litFreq != NULL);
              if (compressedLiterals) {
+                /* base initial cost of literals on direct frequency within src */
                  unsigned lit = MaxLit;
                  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
                  optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
@@ -224,10 +237,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
                  optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
              }
  
-
          }
  
-    } else {   /* new block : re-use previous statistics, scaled down */
+    } else {   /* new block : scale down accumulated statistics */
  
          if (compressedLiterals)
              optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
@@ -275,10 +287,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
      assert(litLength <= ZSTD_BLOCKSIZE_MAX);
      if (optPtr->priceType == zop_predef)
          return WEIGHT(litLength, optLevel);
-    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
-     * because it isn't representable in the zstd format. So instead just
-     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
-     * would be all literals.
+
+    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
+     * because it isn't representable in the zstd format.
+     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
+     * In such a case, the block would be all literals.
       */
      if (litLength == ZSTD_BLOCKSIZE_MAX)
          return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
@@ -292,7 +305,7 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
  }
  
  /* ZSTD_getMatchPrice() :
- * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
   * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
   * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
   * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
@@ -308,8 +321,9 @@ ZSTD_getMatchPrice(U32 const offBase,
      U32 const mlBase = matchLength - MINMATCH;
      assert(matchLength >= MINMATCH);
  
-    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
-        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
+        return WEIGHT(mlBase, optLevel)
+             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
  
      /* dynamic statistics */
      price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -347,7 +361,7 @@ static void ZSTD_updateStats(optState_t* const optPtr,
          optPtr->litLengthSum++;
      }
  
-    /* offset code : expected to follow storeSeq() numeric representation */
+    /* offset code : follows storeSeq() numeric representation */
      {   U32 const offCode = ZSTD_highbit32(offBase);
          assert(offCode <= MaxOff);
          optPtr->offCodeFreq[offCode]++;
@@ -1352,7 +1366,7 @@ size_t ZSTD_compressBlock_btopt(
  /* ZSTD_initStats_ultra():
   * make a first compression pass, just to seed stats with more accurate starting values.
   * only works on first block, with no dictionary and no ldm.
- * this function cannot error, hence its contract must be respected.
+ * this function cannot error out, its narrow contract must be respected.
   */
  static void
  ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@@ -1371,7 +1385,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
  
      ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
  
-    /* invalidate first scan from history */
+    /* invalidate first scan from history, only keep entropy stats */
      ZSTD_resetSeqStore(seqStore);
      ms->window.base -= srcSize;
      ms->window.dictLimit += (U32)srcSize;
@@ -1395,20 +1409,20 @@ size_t ZSTD_compressBlock_btultra2(
      U32 const curr = (U32)((const BYTE*)src - ms->window.base);
      DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
  
-    /* 2-pass strategy:
+    /* 2-passes strategy:
       * this strategy makes a first pass over first block to collect statistics
-     * and seed next round's statistics with it.
-     * After 1st pass, function forgets everything, and starts a new block.
+     * in order to seed next round's statistics with it.
+     * After 1st pass, function forgets history, and starts a new block.
       * Consequently, this can only work if no data has been previously loaded in tables,
       * aka, no dictionary, no prefix, no ldm preprocessing.
       * The compression ratio gain is generally small (~0.5% on first block),
-     * the cost is 2x cpu time on first block. */
+    ** the cost is 2x cpu time on first block. */
      assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
      if ( (ms->opt.litLengthSum==0)   /* first block */
        && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
        && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
-      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
-      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
        ) {
          ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
      }
author	Yann Collet <cyan@fb.com>
	Fri, 19 Aug 2022 23:04:28 +0000 (16:04 -0700)
committer	Yann Collet <cyan@fb.com>
	Thu, 29 Dec 2022 00:24:12 +0000 (16:24 -0800)