From: Nick Terrell Date: Mon, 1 May 2017 17:25:49 +0000 (-0700) Subject: [cover] Optimize case where d <= 8 X-Git-Tag: v1.2.0^2~9^2^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f2d9ef1dc0d491b75877a9200876a7c16dd9694d;p=thirdparty%2Fzstd.git [cover] Optimize case where d <= 8 --- diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 1db42f95b..4235f112b 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -234,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { * Returns 1 if the dmer at lp is greater than the dmer at rp. */ static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { - const U32 lhs = *(const U32 *)lp; - const U32 rhs = *(const U32 *)rp; + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); } +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} /** * Same as COVER_cmp() except ties are broken by pointer value @@ -251,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) { } return result; } +/** + * Faster version for d <= 8. + */ +static int COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_ctx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} /** * Returns the first pointer in [first, last) whose element does not compare @@ -506,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, const BYTE *const samples = (const BYTE *)samplesBuffer; const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); /* Checks */ - if (totalSamplesSize < d || + if (totalSamplesSize < MAX(d, sizeof(U64)) || totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n", (COVER_MAX_SAMPLES_SIZE >> 20)); @@ -520,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, ctx->samplesSizes = samplesSizes; ctx->nbSamples = nbSamples; /* Partial suffix array */ - ctx->suffixSize = totalSamplesSize - d + 1; + ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1; ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); /* Maps index to the dmerID */ ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); @@ -554,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, } /* qsort doesn't take an opaque pointer, so pass as a global */ g_ctx = ctx; - qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp); + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); } DISPLAYLEVEL(2, "Computing frequencies\n"); /* For each dmer group (group of positions with the same first d bytes): @@ -564,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, * 2. We calculate how many samples the dmer occurs in and save it in * freqs[dmerId]. */ - COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp, - &COVER_group); + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); ctx->freqs = ctx->suffix; ctx->suffix = NULL; return 1; diff --git a/programs/zstd.1 b/programs/zstd.1 index 999dc8169..5bd966a84 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -1,5 +1,5 @@ . -.TH "ZSTD" "1" "April 2017" "zstd 1.1.5" "User Commands" +.TH "ZSTD" "1" "May 2017" "zstd 1.2.0" "User Commands" . .SH "NAME" \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files @@ -188,7 +188,7 @@ dictionary selectivity level (default: 9) the smaller the value, the denser the . .TP \fB\-\-cover=k#,d=#\fR -Use alternate dictionary builder algorithm named cover with parameters \fIk\fR and \fId\fR with \fId\fR <= \fIk\fR\. Selects segments of size \fIk\fR with the highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of of size \fId\fR\. Generally \fId\fR should be in the range [6, 24]\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [32, 2048]\. +Use alternate dictionary builder algorithm named cover with parameters \fIk\fR and \fId\fR with \fId\fR <= \fIk\fR\. Selects segments of size \fIk\fR with the highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], but no more than 24\. When \fId\fR <= 8, the dictionary builder will run significantly faster\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [32, 2048]\. . .br Example: \fB\-\-train \-\-cover=k=64,d=8 FILEs\fR\. diff --git a/programs/zstd.1.md b/programs/zstd.1.md index f2d04d16f..0919da702 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -186,7 +186,8 @@ Typical gains range from 10% (at 64KB) to x5 better (at <1KB). Selects segments of size _k_ with the highest score to put in the dictionary. The score of a segment is computed by the sum of the frequencies of all the subsegments of of size _d_. - Generally _d_ should be in the range [6, 24]. + Generally _d_ should be in the range [6, 8], but no more than 24. + When _d_ <= 8, the dictionary builder will run significantly faster. Good values for _k_ vary widely based on the input data, but a safe range is [32, 2048].
Example: `--train --cover=k=64,d=8 FILEs`.