* LDM helper functions
*********************************/
+/* Skips past srcSize bytes in an ldm seqstore */
+static void ldm_skipBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t bytesToSkip) {
+ while (bytesToSkip > 0 && ldmSeqStore->pos < ldmSeqStore->size) {
+ rawSeq* seq = ldmSeqStore->seq + ldmSeqStore->pos;
+ if (bytesToSkip <= seq->litLength) {
+ /* Skip past srcSize literals */
+ seq->litLength -= (U32)bytesToSkip;
+ return;
+ }
+ bytesToSkip -= seq->litLength;
+ seq->litLength = 0;
+ if (bytesToSkip < seq->matchLength) {
+ seq->matchLength -= (U32)bytesToSkip;
+ return;
+ }
+ bytesToSkip -= seq->matchLength;
+ seq->matchLength = 0;
+ ldmSeqStore->pos++;
+ }
+}
+
+/* Splits a sequence if it's across the boundary. May update pos in the seq store too
+ * Pretty much the same function as maybeSplitSequence() in zstd_ldm.c
+ */
+static rawSeq ldm_splitSequenceAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, U32 remainingBytes) {
+ rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
+ /* Case where don't split the match*/
+ if (remainingBytes >= currSeq.litLength + currSeq.matchLength) {
+ ldmSeqStore->pos++;
+ return currSeq;
+ }
+ /* Need a split */
+ if (remainingBytes <= currSeq.litLength) {
+ currSeq.offset = 0;
+ } else if (remainingBytes < currSeq.litLength + currSeq.matchLength) {
+ currSeq.matchLength = remainingBytes - currSeq.litLength;
+ }
+
+ /* After deriving currSeq which is the sequence before the block boundary,
+ * we now must skip past the remaining number of bytes unaccounted for,
+ * and update the entry at pos in the seqStore, which represents the second half
+ * of the sequence after the block boundary
+ */
+ ldm_skipBytesInSeqStore(ldmSeqStore, remainingBytes);
+ return currSeq;
+}
+
/* Moves forward in rawSeqStore by nbBytes bytes, which will updating the fields
* 'pos' and 'posInSequence' accordingly.
*/
ldmSeqStore->posInSequence += nbBytes;
return;
} else {
+ ldmSeqStore->posInSequence += currSeq.litLength;
nbBytes -= currSeq.litLength;
}
return;
} else {
nbBytes -= currSeq.matchLength;
+ /* We have moved through this entire sequence - move the read pos
+ forward to the next sequence, and reset posInSequence */
ldmSeqStore->pos++;
ldmSeqStore->posInSequence = 0;
}
}
}
+/* Calculates the beginning and end of a match, and updates ldmSeqStore as
+ * necessary.
+ * posInSequence can be either within the literals section, or within a match.
+ * If
+ */
static void ldm_calculateMatchRange(rawSeqStore_t* ldmSeqStore,
U32* matchStartPosInBlock, U32* matchEndPosInBlock,
U32* matchOffset, U32 currPosInBlock,
- U32 remainingBytes, U32 currBlockEndPos) {
+ U32 blockBytesRemaining) {
rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
- U32 literalsBytesLeft = (ldmSeqStore->posInSequence < currSeq.litLength) ?
+ U32 currBlockEndPos = currPosInBlock + blockBytesRemaining;
+ U32 literalsBytesRemaining = (ldmSeqStore->posInSequence < currSeq.litLength) ?
currSeq.litLength - ldmSeqStore->posInSequence :
0;
+
/* In this case, the match is further in the block than currPosInBlock, and we are
currently in the literals section of the LDM */
- if (literalsBytesLeft) {
- if (literalsBytesLeft >= remainingBytes) {
+ if (literalsBytesRemaining) {
+ if (literalsBytesRemaining >= blockBytesRemaining) {
/* If there are more literal bytes than bytes remaining in block, no ldm */
*matchStartPosInBlock = UINT_MAX;
*matchEndPosInBlock = UINT_MAX;
- ldm_moveForwardBytesInSeqStore(ldmSeqStore, remainingBytes);
+ ldm_moveForwardBytesInSeqStore(ldmSeqStore, blockBytesRemaining);
return;
}
}
- *matchStartPosInBlock = currPosInBlock + currSeq.litLength;
+ /* Matches may be < MINMATCH by this process. In that case, we will reject them
+ when we are deciding whether or not to add the ldm */
+ *matchStartPosInBlock = currPosInBlock + literalsBytesRemaining;
*matchEndPosInBlock = *matchStartPosInBlock + currSeq.matchLength;
*matchOffset = currSeq.offset;
- /* Match ends after the block ends, we can't use the whole match */
if (*matchEndPosInBlock > currBlockEndPos) {
+ /* Match ends after the block ends, we can't use the whole match */
*matchEndPosInBlock = currBlockEndPos;
ldm_moveForwardBytesInSeqStore(ldmSeqStore, currBlockEndPos - currPosInBlock);
} else {
}
}
-/* Skips past srcSize bytes in an ldm seqstore */
-static void ldm_skipBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t bytesToSkip) {
- while (bytesToSkip > 0 && ldmSeqStore->pos < ldmSeqStore->size) {
- rawSeq* seq = ldmSeqStore->seq + ldmSeqStore->pos;
- if (bytesToSkip <= seq->litLength) {
- /* Skip past srcSize literals */
- seq->litLength -= (U32)bytesToSkip;
- return;
- }
- bytesToSkip -= seq->litLength;
- seq->litLength = 0;
- if (bytesToSkip < seq->matchLength) {
- seq->matchLength -= (U32)bytesToSkip;
- return;
- }
- bytesToSkip -= seq->matchLength;
- seq->matchLength = 0;
- ldmSeqStore->pos++;
- }
-}
-
-/* Splits a sequence if it's across the boundary. May update pos in the seq store too
- * Pretty much the same function as maybeSplitSequence() in zstd_ldm.c
- */
-static rawSeq ldm_splitSequenceAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, U32 remainingBytes) {
- rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
- /* Case where don't split the match*/
- if (remainingBytes >= currSeq.litLength + currSeq.matchLength) {
- ldmSeqStore->pos++;
- return currSeq;
- }
- /* Need a split */
- if (remainingBytes <= currSeq.litLength) {
- currSeq.offset = 0;
- } else if (remainingBytes < currSeq.litLength + currSeq.matchLength) {
- currSeq.matchLength = remainingBytes - currSeq.litLength;
- }
-
- /* After deriving currSeq which is the sequence before the block boundary,
- * we now must skip past the remaining number of bytes unaccounted for,
- * and update the entry at pos in the seqStore, which represents the second half
- * of the sequence after the block boundary
- */
- ldm_skipBytesInSeqStore(ldmSeqStore, remainingBytes);
- return currSeq;
-}
-
/* Fetch the next match in the ldm seq store */
static void ldm_getNextMatch(rawSeqStore_t* ldmSeqStore,
U32* matchStartPosInBlock, U32* matchEndPosInBlock,
return;
}*/
- ldm_calculateMatchRange(ldmSeqStore, matchStartPosInBlock, matchEndPosInBlock, matchOffset, remainingBytes);
- *matchStartPosInBlock = currPosInBlock + seq.litLength;
- *matchEndPosInBlock = *matchStartPosInBlock + seq.matchLength;
- *matchOffset = seq.offset;
+ ldm_calculateMatchRange(ldmSeqStore, matchStartPosInBlock, matchEndPosInBlock, matchOffset, currPosInBlock, remainingBytes);
return;
}
ldm_moveForwardBytesInSeqStore(ldmSeqStore, posOvershoot);
}
ldm_getNextMatch(ldmSeqStore, matchStartPosInBlock,
- matchEndPosInBlock, matchOffset,
- currPosInBlock, remainingBytes);
+ matchEndPosInBlock, matchOffset,
+ currPosInBlock, remainingBytes);
}
ldm_maybeAddLdm(matches, nbMatches, *matchStartPosInBlock, *matchEndPosInBlock, *matchOffset, currPosInBlock);
}
U32 ldmEndPosInBlock = 0;
U32 ldmOffset = 0;
- /*if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos != ms->ldmSeqStore.size) {
- if (ms->ldmSeqStore.base != base) {
+ if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos != ms->ldmSeqStore.size) {
+ /*if (ms->ldmSeqStore.base != base) {
int baseDiff = (int)(ms->ldmSeqStore.base - base);
ms->ldmSeqStore.seq[ms->ldmSeqStore.pos].litLength += baseDiff;
ms->ldmSeqStore.base = base;
- }
+ }*/
ldm_getNextMatch(&ms->ldmSeqStore, &ldmStartPosInBlock,
&ldmEndPosInBlock, &ldmOffset,
(U32)(ip-istart), (U32)(iend-ip));
- }*/
+ }
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
if (ldmEndPosInBlock < srcSize) {
/* This can occur if after adding the final match in an ldm seq store within this block,
ip goes to the end of the block without activating a check for ldm_getNextMatch */
- ldm_moveForwardBytesInSeqStore(ms->ldmSeqStore, srcSize - ldmEndPosInBlock);
+ ldm_moveForwardBytesInSeqStore(&ms->ldmSeqStore, srcSize - ldmEndPosInBlock);
}
/* Return the last literals size */
return (size_t)(iend - anchor);