/* check repCode */
{ U32 i;
- for (i=0; i<ZSTD_REP_CHECK; i++) {
+ for (i=(ip == anchor); i<ZSTD_REP_CHECK; i++) {
if ((rep[i]<(U32)(ip-prefixStart))
&& (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch))) {
mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch;
best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
goto _storeSequence;
}
- best_off = (i<=1 && ip == anchor) ? 1-i : i;
+ best_off = i - (ip == anchor);
do {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
if (mlen > last_pos || price < opt[mlen].price)
best_mlen = minMatch;
{ U32 i;
- for (i=0; i<ZSTD_REP_CHECK; i++) {
+ for (i=(opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) { /* check rep */
if ((opt[cur].rep[i]<(U32)(inr-prefixStart))
- && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) { /* check rep */
+ && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {
mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);
goto _storeSequence;
}
- best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+ //best_off = ((i<=1) & (opt[cur].mlen != 1)) ? 1-i : i;
+ best_off = i - (opt[cur].mlen != 1);
+
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen) {
rep[1] = rep[0];
rep[0] = best_off;
}
- if (litLength == 0 && offset<=1) offset = 1-offset;
+ if ((litLength == 0) & (offset==0)) offset = rep[1]; /* protection, but should never happen */
+ if ((litLength == 0) & (offset<=2)) offset--;
}
ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
/* check repCode */
{ U32 i;
- for (i=0; i<ZSTD_REP_CHECK; i++) {
+ for (i = (ip==anchor); i<ZSTD_REP_CHECK; i++) {
const U32 repIndex = (U32)(current - rep[i]);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
goto _storeSequence;
}
- best_off = (i<=1 && ip == anchor) ? 1-i : i;
+ best_off = i - (ip==anchor);
litlen = opt[0].litlen;
do {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
best_mlen = 0;
{ U32 i;
- for (i=0; i<ZSTD_REP_CHECK; i++) {
+ for (i = (opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {
const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
goto _storeSequence;
}
- best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+ best_off = i - (opt[cur].mlen != 1);
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen) {
if (offset != 1) rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = best_off;
- }
- if (litLength == 0 && offset<=1) offset = 1-offset;
+ }
+ if ((litLength==0) & (offset==0)) offset = rep[1]; /* protection, but should never happen */
+ if ((litLength==0) & (offset<=2)) offset --;
}
ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
They are sorted in recency order, with 1 meaning "most recent one".
There is an exception though, when current sequence's literal length is `0`.
-In which case, the first 2 values are swapped,
-meaning `2` refers to the most recent offset,
-while `1` refers to the second most recent offset,
+In which case, repcodes are "pushed by one",
+so 1 becomes 2, 2 becomes 3,
+and 3 becomes "offset_1 - 1_byte".
-Repeat offsets start with the following values : 1, 4 and 8 (in order).
+On first block, offset history is populated by the following values : 1, 4 and 8 (in order).
Then each block receives its start value from previous compressed block.
Note that non-compressed blocks are skipped,
###### Offset updates rules
-When the new offset is a normal one,
-offset history is simply translated by one position,
-with the new offset taking first spot.
+New offset take the lead in offset history,
+up to its previous place if it was already present.
-- When repeat offset 1 (most recent) is used, history is unmodified.
-- When repeat offset 2 is used, it's swapped with offset 1.
-- When repeat offset 3 is used, it takes first spot,
- pushing the other ones by one position.
+It means that when repeat offset 1 (most recent) is used, history is unmodified.
+When repeat offset 2 is used, it's swapped with offset 1.
Dictionary format