[cover] Optimize case where d <= 8

author Nick Terrell <terrelln@fb.com>

Mon, 1 May 2017 17:25:49 +0000 (10:25 -0700)

committer Nick Terrell <terrelln@fb.com>

Tue, 2 May 2017 18:02:43 +0000 (11:02 -0700)
author Nick Terrell <terrelln@fb.com>
Mon, 1 May 2017 17:25:49 +0000 (10:25 -0700)
committer Nick Terrell <terrelln@fb.com>
Tue, 2 May 2017 18:02:43 +0000 (11:02 -0700)
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c

index 1db42f95b8d488537d918545506bf1f7f341dbab..4235f112b7fb3a20807ad62c84fc8f741cf05276 100644 (file)
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -234,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
   * Returns 1 if the dmer at lp is greater than the dmer at rp.
   */
  static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
-  const U32 lhs = *(const U32 *)lp;
-  const U32 rhs = *(const U32 *)rp;
+  U32 const lhs = *(U32 const *)lp;
+  U32 const rhs = *(U32 const *)rp;
    return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
  }
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
+  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
+  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
+  if (lhs < rhs) {
+    return -1;
+  }
+  return (lhs > rhs);
+}
  
  /**
   * Same as COVER_cmp() except ties are broken by pointer value
@@ -251,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
    }
    return result;
  }
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_strict_cmp8(const void *lp, const void *rp) {
+  int result = COVER_cmp8(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
  
  /**
   * Returns the first pointer in [first, last) whose element does not compare
@@ -506,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
    const BYTE *const samples = (const BYTE *)samplesBuffer;
    const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
    /* Checks */
-  if (totalSamplesSize < d ||
+  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
        totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
      DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
                   (COVER_MAX_SAMPLES_SIZE >> 20));
@@ -520,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
    ctx->samplesSizes = samplesSizes;
    ctx->nbSamples = nbSamples;
    /* Partial suffix array */
-  ctx->suffixSize = totalSamplesSize - d + 1;
+  ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
    ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
    /* Maps index to the dmerID */
    ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@@ -554,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
      }
      /* qsort doesn't take an opaque pointer, so pass as a global */
      g_ctx = ctx;
-    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
    }
    DISPLAYLEVEL(2, "Computing frequencies\n");
    /* For each dmer group (group of positions with the same first d bytes):
@@ -564,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
     * 2. We calculate how many samples the dmer occurs in and save it in
     *    freqs[dmerId].
     */
-  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
-                &COVER_group);
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
+                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
    ctx->freqs = ctx->suffix;
    ctx->suffix = NULL;
    return 1;
diff --git a/programs/zstd.1 b/programs/zstd.1

index 999dc8169fee2b1b796dfee7a3bc31a8dd9a4a5a..5bd966a841df2654986622c789a299f560fcf268 100644 (file)
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
  .
-.TH "ZSTD" "1" "April 2017" "zstd 1.1.5" "User Commands"
+.TH "ZSTD" "1" "May 2017" "zstd 1.2.0" "User Commands"
  .
  .SH "NAME"
  \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -188,7 +188,7 @@ dictionary selectivity level (default: 9) the smaller the value, the denser the
  .
  .TP
  \fB\-\-cover=k#,d=#\fR
-Use alternate dictionary builder algorithm named cover with parameters \fIk\fR and \fId\fR with \fId\fR <= \fIk\fR\. Selects segments of size \fIk\fR with the highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of of size \fId\fR\. Generally \fId\fR should be in the range [6, 24]\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [32, 2048]\.
+Use alternate dictionary builder algorithm named cover with parameters \fIk\fR and \fId\fR with \fId\fR <= \fIk\fR\. Selects segments of size \fIk\fR with the highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], but no more than 24\. When \fId\fR <= 8, the dictionary builder will run significantly faster\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [32, 2048]\.
  .
  .br
  Example: \fB\-\-train \-\-cover=k=64,d=8 FILEs\fR\.
diff --git a/programs/zstd.1.md b/programs/zstd.1.md

index f2d04d16f20b85d7bcc6a3366c6ea99cba3ce863..0919da70265f6fdeec098aa37a02e907b6327999 100644 (file)
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -186,7 +186,8 @@ Typical gains range from 10% (at 64KB) to x5 better (at <1KB).
      Selects segments of size _k_ with the highest score to put in the dictionary.
      The score of a segment is computed by the sum of the frequencies of all the
      subsegments of of size _d_.
-    Generally _d_ should be in the range [6, 24].
+    Generally _d_ should be in the range [6, 8], but no more than 24.
+    When _d_ <= 8, the dictionary builder will run significantly faster.
      Good values for _k_ vary widely based on the input data,
      but a safe range is [32, 2048].<br />
      Example: `--train --cover=k=64,d=8 FILEs`.
author	Nick Terrell <terrelln@fb.com>
	Mon, 1 May 2017 17:25:49 +0000 (10:25 -0700)
committer	Nick Terrell <terrelln@fb.com>
	Tue, 2 May 2017 18:02:43 +0000 (11:02 -0700)
lib/dictBuilder/cover.c		patch \| blob \| blame \| history
programs/zstd.1		patch \| blob \| blame \| history
programs/zstd.1.md		patch \| blob \| blame \| history