Implemented repOffset "minus 1" on ll==0

author Yann Collet <yann.collet.73@gmail.com>

Sun, 31 Jul 2016 02:01:57 +0000 (04:01 +0200)

committer Yann Collet <yann.collet.73@gmail.com>

Sun, 31 Jul 2016 02:01:57 +0000 (04:01 +0200)
author Yann Collet <yann.collet.73@gmail.com>
Sun, 31 Jul 2016 02:01:57 +0000 (04:01 +0200)
committer Yann Collet <yann.collet.73@gmail.com>
Sun, 31 Jul 2016 02:01:57 +0000 (04:01 +0200)
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h

index 3eac1ac879306566c99418971a5b1e9bc3571a12..1946a3ae56449a6da8cd2a356a536f43c3fcd79f 100644 (file)
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -453,7 +453,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
  
          /* check repCode */
          {   U32 i;
-            for (i=0; i<ZSTD_REP_CHECK; i++) {
+            for (i=(ip == anchor); i<ZSTD_REP_CHECK; i++) {
                  if ((rep[i]<(U32)(ip-prefixStart))
                      && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch))) {
                      mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch;
@@ -462,7 +462,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                          best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                          goto _storeSequence;
                      }
-                    best_off = (i<=1 && ip == anchor) ? 1-i : i;
+                    best_off = i - (ip == anchor);
                      do {
                          price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
                          if (mlen > last_pos || price < opt[mlen].price)
@@ -544,9 +544,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
  
             best_mlen = minMatch;
             {   U32 i;
-               for (i=0; i<ZSTD_REP_CHECK; i++) {
+               for (i=(opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {  /* check rep */
                     if ((opt[cur].rep[i]<(U32)(inr-prefixStart))
-                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {  /* check rep */
+                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {
                         mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
                         ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);
  
@@ -556,7 +556,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                              goto _storeSequence;
                         }
  
-                       best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+                       //best_off = ((i<=1) & (opt[cur].mlen != 1)) ? 1-i : i;
+                       best_off = i - (opt[cur].mlen != 1);
+
                         if (opt[cur].mlen == 1) {
                              litlen = opt[cur].litlen;
                              if (cur > litlen) {
@@ -661,7 +663,8 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                      rep[1] = rep[0];
                      rep[0] = best_off;
                  }
-                if (litLength == 0 && offset<=1) offset = 1-offset;
+                if ((litLength == 0) & (offset==0)) offset = rep[1];  /* protection, but should never happen */
+                if ((litLength == 0) & (offset<=2)) offset--;
              }
  
              ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
@@ -746,7 +749,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
  
          /* check repCode */
          {   U32 i;
-            for (i=0; i<ZSTD_REP_CHECK; i++) {
+            for (i = (ip==anchor); i<ZSTD_REP_CHECK; i++) {
                  const U32 repIndex = (U32)(current - rep[i]);
                  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                  const BYTE* const repMatch = repBase + repIndex;
@@ -763,7 +766,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                          goto _storeSequence;
                      }
  
-                    best_off = (i<=1 && ip == anchor) ? 1-i : i;
+                    best_off = i - (ip==anchor);
                      litlen = opt[0].litlen;
                      do {
                          price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
@@ -849,7 +852,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
              best_mlen = 0;
  
              {   U32 i;
-                for (i=0; i<ZSTD_REP_CHECK; i++) {
+                for (i = (opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {
                      const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]);
                      const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                      const BYTE* const repMatch = repBase + repIndex;
@@ -867,7 +870,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                              goto _storeSequence;
                          }
  
-                        best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+                        best_off = i - (opt[cur].mlen != 1);
                          if (opt[cur].mlen == 1) {
                              litlen = opt[cur].litlen;
                              if (cur > litlen) {
@@ -973,8 +976,9 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                      if (offset != 1) rep[2] = rep[1];
                      rep[1] = rep[0];
                      rep[0] = best_off;
-                 }
-                 if (litLength == 0 && offset<=1) offset = 1-offset;
+                }
+                if ((litLength==0) & (offset==0)) offset = rep[1];  /* protection, but should never happen */
+                if ((litLength==0) & (offset<=2)) offset --;
              }
  
              ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c

index e1ac2004923f96eb6502c1b13a770578e03ea1f5..958d63692847a4d38df714e9bd34826471739304 100644 (file)
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -627,9 +627,9 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
          }
  
          if (ofCode <= 1) {
-            if ((llCode == 0) & (offset <= 1)) offset = 1-offset;
+            offset += (llCode==0);
              if (offset) {
-                size_t const temp = seqState->prevOffset[offset];
+                size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
                  if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                  seqState->prevOffset[1] = seqState->prevOffset[0];
                  seqState->prevOffset[0] = offset = temp;
diff --git a/zstd_compression_format.md b/zstd_compression_format.md

index b4f8b8af4d5ddff6ff05d555f8dfa9d93d5432df..da5c94afd5e372cc8200c256adf4bb36ea665a1a 100644 (file)
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@@ -1081,11 +1081,11 @@ As seen in [Offset Codes], the first 3 values define a repeated offset.
  They are sorted in recency order, with 1 meaning "most recent one".
  
  There is an exception though, when current sequence's literal length is `0`.
-In which case, the first 2 values are swapped,
-meaning `2` refers to the most recent offset,
-while `1` refers to the second most recent offset,
+In which case, repcodes are "pushed by one",
+so 1 becomes 2, 2 becomes 3,
+and 3 becomes "offset_1 - 1_byte".
  
-Repeat offsets start with the following values : 1, 4 and 8 (in order).
+On first block, offset history is populated by the following values : 1, 4 and 8 (in order).
  
  Then each block receives its start value from previous compressed block.
  Note that non-compressed blocks are skipped,
@@ -1095,14 +1095,11 @@ they do not contribute to offset history.
  
  ###### Offset updates rules
  
-When the new offset is a normal one,
-offset history is simply translated by one position,
-with the new offset taking first spot.
+New offset take the lead in offset history,
+up to its previous place if it was already present.
  
-- When repeat offset 1 (most recent) is used, history is unmodified.
-- When repeat offset 2 is used, it's swapped with offset 1.
-- When repeat offset 3 is used, it takes first spot,
-  pushing the other ones by one position.
+It means that when repeat offset 1 (most recent) is used, history is unmodified.
+When repeat offset 2 is used, it's swapped with offset 1.
  
  
  Dictionary format
author	Yann Collet <yann.collet.73@gmail.com>
	Sun, 31 Jul 2016 02:01:57 +0000 (04:01 +0200)
committer	Yann Collet <yann.collet.73@gmail.com>
	Sun, 31 Jul 2016 02:01:57 +0000 (04:01 +0200)
lib/compress/zstd_opt.h		patch \| blob \| blame \| history
lib/decompress/zstd_decompress.c		patch \| blob \| blame \| history
zstd_compression_format.md		patch \| blob \| blame \| history