/*
* Compression parameters.
+ *
+ * LZ77_MATCH_MAX_DIST: Farthest back a match can be from current position (can be 1 - 8K).
+ * LZ77_HASH_LOG:
+ * LZ77_HASH_SIZE: ilog2 hash size (recommended to be 13 - 18, default 15 (hash size
+ * 32k)).
+ * LZ77_RSTEP_SIZE: Number of bytes to read from input buffer for hashing and initial
+ * match check (default 4 bytes, this effectivelly makes this the min
+ * match len).
+ * LZ77_MSTEP_SIZE: Number of bytes to extend-compare a found match (default 8 bytes).
+ * LZ77_SKIP_TRIGGER: ilog2 value for adaptive skipping, i.e. to progressively skip input
+ * bytes when we can't find matches. Default is 4.
+ * Higher values (>0) will decrease compression time, but will result
+ * in worse compression ratio. Lower values will give better
+ * compression ratio (more matches found), but will increase time.
*/
#define LZ77_MATCH_MAX_DIST SZ_8K
#define LZ77_HASH_LOG 15
return (cur - start);
}
+/**
+ * lz77_encode_match() - Match encoding.
+ * @dst: compressed buffer
+ * @nib: pointer to an address in @dst
+ * @dist: match distance
+ * @len: match length
+ *
+ * Assumes all args were previously checked.
+ *
+ * Return: @dst advanced to new position
+ *
+ * Ref: MS-XCA 2.3.4 "Plain LZ77 Compression Algorithm Details" - "Processing"
+ */
static __always_inline void *lz77_encode_match(void *dst, void **nib, u16 dist, u32 len)
{
len -= 3;
if (len < 7) {
lz77_write16(dst, dist + len);
- return dst + 2;
+ return dst + sizeof(u16);
}
dist |= 7;
lz77_write16(dst, dist);
- dst += 2;
+ dst += sizeof(u16);
len -= 7;
if (!*nib) {
if (len <= 0xffff) {
lz77_write16(dst, len);
- return dst + 2;
+ return dst + sizeof(u16);
}
lz77_write16(dst, 0);
- dst += 2;
+ dst += sizeof(u16);
lz77_write32(dst, len);
- return dst + 4;
+ return dst + sizeof(u32);
}
+/**
+ * lz77_encode_literals() - Literals encoding.
+ * @start: where to start copying literals (uncompressed buffer)
+ * @end: when to stop copying (uncompressed buffer)
+ * @dst: compressed buffer
+ * @f: pointer to current flag value
+ * @fc: pointer to current flag count
+ * @fp: pointer to current flag address
+ *
+ * Batch copy literals from @start to @dst, updating flag values accordingly.
+ * Assumes all args were previously checked.
+ *
+ * Return: @dst advanced to new position
+ *
+ * MS-XCA 2.3.4 "Plain LZ77 Compression Algorithm Details" - "Processing"
+ */
static __always_inline void *lz77_encode_literals(const void *start, const void *end, void *dst,
long *f, u32 *fc, void **fp)
{
lz77_write32(*fp, *f);
*fc = 0;
*fp = dst;
- dst += 4;
+ dst += sizeof(u32);
}
} while (start < end);
rlim = end - LZ77_MSTEP_SIZE; /* read limit (for lz77_match_len()) */
dstp = dst;
flag_pos = dstp;
- dstp += 4;
+ dstp += sizeof(u32);
nib = NULL;
htable = kvcalloc(LZ77_HASH_SIZE, sizeof(*htable), GFP_KERNEL);
LZ77_PREFETCH(srcp + LZ77_RSTEP_SIZE);
+ /*
+ * Adjust @srcp so we don't get a false positive match on first iteration.
+ * Then prepare hash for first loop iteration (don't advance @srcp again).
+ */
hash = lz77_hash(lz77_read32(srcp++));
htable[hash] = 0;
hash = lz77_hash(lz77_read32(srcp));
srcp = next;
next += step;
+
+ /*
+ * Adaptive skipping.
+ *
+ * Increment @step every (1 << LZ77_SKIP_TRIGGER, 16 in our case) bytes
+ * without a match.
+ * Reset to 1 when a match is found.
+ */
step = (skip++ >> LZ77_SKIP_TRIGGER);
if (unlikely(next > rlim))
goto out;
} while (likely(match + LZ77_MATCH_MAX_DIST < srcp) ||
lz77_read32(match) != lz77_read32(srcp));
+ /*
+ * Match found. Warm/cold path; begin parsing @srcp and writing to @dstp:
+ * - flush literals
+ * - compute match length (*)
+ * - encode match
+ *
+ * (*) Current minimum match length is defined by the memory read size above, so
+ * here we already know that we have 4 matching bytes, but it's just faster to
+ * redundantly compute it again in lz77_match_len() than to adjust pointers/len.
+ */
dstp = lz77_encode_literals(anchor, srcp, dstp, &flag, &flag_count, &flag_pos);
len = lz77_match_len(match, srcp, end);
dstp = lz77_encode_match(dstp, &nib, srcp - match, len);
lz77_write32(flag_pos, flag);
flag_count = 0;
flag_pos = dstp;
- dstp += 4;
+ dstp += sizeof(u32);
}
if (unlikely(srcp > rlim))