}
}
+/** ZSTD_ldm_gear_reset()
+ * Feeds [data, data + minMatchLength) into the hash without registering any
+ * splits. This effectively resets the hash state. This is used when skipping
+ * over data, either at the beginning of a block, or skipping sections.
+ */
+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
+ BYTE const* data, size_t minMatchLength)
+{
+ U64 hash = state->rolling;
+ size_t n = 0;
+
+#define GEAR_ITER_ONCE() do { \
+ hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+ n += 1; \
+ } while (0)
+ while (n + 3 < minMatchLength) {
+ GEAR_ITER_ONCE();
+ GEAR_ITER_ONCE();
+ GEAR_ITER_ONCE();
+ GEAR_ITER_ONCE();
+ }
+ while (n < minMatchLength) {
+ GEAR_ITER_ONCE();
+ }
+#undef GEAR_ITER_ONCE
+}
+
/** ZSTD_ldm_gear_feed():
*
* Registers in the splits array all the split points found in the first
while (ip < iend) {
size_t hashed;
unsigned n;
-
+
numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
/* Initialize the rolling hash state with the first minMatchLength bytes */
ZSTD_ldm_gear_init(&hashState, params);
- {
- size_t n = 0;
-
- while (n < minMatchLength) {
- numSplits = 0;
- n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
- splits, &numSplits);
- }
- ip += minMatchLength;
- }
+ ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
+ ip += minMatchLength;
while (ip < ilimit) {
size_t hashed;
for (n = 0; n < numSplits; n++) {
size_t forwardMatchLength = 0, backwardMatchLength = 0,
bestMatchLength = 0, mLength;
+ U32 offset;
BYTE const* const split = candidates[n].split;
U32 const checksum = candidates[n].checksum;
U32 const hash = candidates[n].hash;
}
/* Match found */
+ offset = (U32)(split - base) - bestEntry->offset;
mLength = forwardMatchLength + backwardMatchLength;
{
- U32 const offset = (U32)(split - base) - bestEntry->offset;
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
/* Out of sequence storage */
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
anchor = split + forwardMatchLength;
+
+ /* If we find a match that ends after the data that we've hashed
+ * then we have a repeating, overlapping, pattern. E.g. all zeros.
+ * If one repetition of the pattern matches our `stopMask` then all
+ * repetitions will. We don't need to insert them all into out table,
+ * only the first one. So skip over overlapping matches.
+ * This is a major speed boost (20x) for compressing a single byte
+ * repeated, when that byte ends up in the table.
+ */
+ if (anchor > ip + hashed) {
+ ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
+ /* Continue the outter loop at anchor (ip + hashed == anchor). */
+ ip = anchor - hashed;
+ break;
+ }
}
ip += hashed;