]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Rerun cover and fastCover with optimized values
authorJennifer Liu <jenniferliu620@fb.com>
Fri, 27 Jul 2018 02:03:01 +0000 (19:03 -0700)
committerJennifer Liu <jenniferliu620@fb.com>
Fri, 27 Jul 2018 02:03:01 +0000 (19:03 -0700)
contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
contrib/experimental_dict_builders/fastCover/fastCover.c

index 1ee4b19bad54790506a13200dc2c3e1770a74c42..04866b7e6522095e3b97cc6e1c2cd163cd7d9d91 100644 (file)
@@ -13,108 +13,113 @@ Benchmark given input files: make ARG= followed by permitted arguments
 make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
 
 ###Benchmarking Result:
-
-For every f value for fast, the first one is optimize and the second one has k=200
+First Cover is optimize cover, second Cover uses optimized d and k from first one.
+For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one.
 
 github:
-NODICT       0.000023       2.999642
-RANDOM       0.149020       8.786957
-LEGACY       0.854277       8.989482
-FAST15       8.764078       10.609015
-FAST15       0.232610       9.135669
-FAST16       9.597777       10.474574
-FAST16       0.243698       9.346482
-FAST17       9.385449       10.611737
-FAST17       0.268376       9.605798
-FAST18       9.988885       10.626382
-FAST18       0.311769       9.130565
-FAST19       10.737259       10.411729
-FAST19       0.331885       9.271814
-FAST20       10.479782       10.388895
-FAST20       0.498416       9.194115
-FAST21       21.189883       10.376394
-FAST21       1.098532       9.244456
-FAST22       39.849935       10.432555
-FAST22       2.590561       9.410930
-FAST23       75.832399       10.614747
-FAST23       6.108487       9.484150
-FAST24       139.782714       10.611753
-FAST24       13.029406       9.379030
-COVER       55.118542       10.641263
+NODICT       0.000004       2.999642
+RANDOM       0.146096       8.786957
+LEGACY       0.956888       8.989482
+COVER       56.596152       10.641263
+COVER       4.937047       10.641263
+FAST15       17.722269       10.586461
+FAST15       0.239135       10.586461
+FAST16       18.276179       10.492503
+FAST16       0.265285       10.492503
+FAST17       18.077916       10.611737
+FAST17       0.236573       10.611737
+FAST18       19.510150       10.621586
+FAST18       0.278683       10.621586
+FAST19       18.794350       10.629626
+FAST19       0.307943       10.629626
+FAST20       19.671099       10.610308
+FAST20       0.428814       10.610308
+FAST21       36.527238       10.625733
+FAST21       0.716384       10.625733
+FAST22       83.803521       10.625281
+FAST22       1.290246       10.625281
+FAST23       158.287924       10.602342
+FAST23       3.084848       10.602342
+FAST24       283.630941       10.603379
+FAST24       8.088933       10.603379
 
 hg-commands
-NODICT       0.000012       2.425291
-RANDOM       0.083071       3.489515
-LEGACY       0.835195       3.911896
-FAST15       0.163980       3.808375
-FAST16       6.373850       4.010783
-FAST16       0.160299       3.966604
-FAST17       6.668799       4.091602
-FAST17       0.172480       4.062773
-FAST18       6.266105       4.130824
-FAST18       0.171554       4.094666
-FAST19       6.869651       4.158180
-FAST19       0.209468       4.111289
-FAST20       8.267766       4.149707
-FAST20       0.331680       4.119873
-FAST21       18.824296       4.171784
-FAST21       0.783961       4.120884
-FAST22       33.321252       4.152035
-FAST22       1.854215       4.126626
-FAST23       60.775388       4.157595
-FAST23       4.040395       4.134222
-FAST24       110.910038       4.163091
-FAST24       8.505828       4.143533
-COVER       61.654796       4.131136
+NODICT       0.000007       2.425291
+RANDOM       0.084010       3.489515
+LEGACY       0.926763       3.911896
+COVER       62.036915       4.131136
+COVER       2.194398       4.131136
+FAST15       12.169025       3.903719
+FAST15       0.156552       3.903719
+FAST16       11.886255       4.005077
+FAST16       0.155506       4.005077
+FAST17       11.886955       4.097811
+FAST17       0.176327       4.097811
+FAST18       12.544698       4.136081
+FAST18       0.171796       4.136081
+FAST19       12.920868       4.166021
+FAST19       0.207029       4.166021
+FAST20       15.771429       4.163740
+FAST20       0.258685       4.163740
+FAST21       33.165829       4.157057
+FAST21       0.663088       4.157057
+FAST22       68.779201       4.158195
+FAST22       1.568439       4.158195
+FAST23       121.921931       4.161450
+FAST23       2.498972       4.161450
+FAST24       221.990451       4.159658
+FAST24       5.793594       4.159658
 
 hg-changelog
 NODICT       0.000004       1.377613
-RANDOM       0.582067       2.096785
-LEGACY       2.739515       2.058273
-FAST15       35.682665       2.127596
-FAST15       0.931621       2.115299
-FAST16       36.557988       2.141787
-FAST16       1.008155       2.136080
-FAST17       36.272242       2.155332
-FAST17       0.906803       2.154596
-FAST18       35.542043       2.171997
-FAST18       1.063101       2.167723
-FAST19       37.756934       2.180893
-FAST19       1.257291       2.173768
-FAST20       40.273755       2.179442
-FAST20       1.630522       2.170072
-FAST21       54.606548       2.181400
-FAST21       2.321266       2.171643
-FAST22       72.454066       2.178774
-FAST22       5.092888       2.168885
-FAST23       106.753208       2.180347
-FAST23       14.722222       2.170673
-FAST24       171.083201       2.183426
-FAST24       27.575575       2.170623
-COVER       227.219660       2.188654
+RANDOM       0.549307       2.096785
+LEGACY       2.273818       2.058273
+COVER       219.640608       2.188654
+COVER       6.055391       2.188654
+FAST15       67.820700       2.127194
+FAST15       0.824624       2.127194
+FAST16       69.774209       2.145401
+FAST16       0.889737       2.145401
+FAST17       70.027355       2.157544
+FAST17       0.869004       2.157544
+FAST18       68.229652       2.173127
+FAST18       0.930689       2.173127
+FAST19       70.696241       2.179527
+FAST19       1.385515       2.179527
+FAST20       80.618172       2.183233
+FAST20       1.699632       2.183233
+FAST21       96.366254       2.180920
+FAST21       2.606553       2.180920
+FAST22       139.440758       2.184297
+FAST22       5.962606       2.184297
+FAST23       207.791930       2.187666
+FAST23       14.823301       2.187666
+FAST24       322.050385       2.189889
+FAST24       29.294918       2.189889
 
 hg-manifest
-NODICT       0.000007       1.866385
-RANDOM       1.086571       2.309485
-LEGACY       9.567507       2.506775
-FAST15       77.811380       2.380461
-FAST15       1.969718       2.317727
-FAST16       75.789019       2.469144
-FAST16       2.051283       2.375815
-FAST17       79.659040       2.539069
-FAST17       1.995394       2.501047
-FAST18       76.281105       2.578095
-FAST18       2.059272       2.564840
-FAST19       79.395382       2.590433
-FAST19       2.354158       2.591024
-FAST20       87.937568       2.597813
-FAST20       2.922189       2.597104
-FAST21       121.760549       2.598408
-FAST21       4.798981       2.600269
-FAST22       155.878461       2.594560
-FAST22       8.151807       2.601047
-FAST23       194.238003       2.596761
-FAST23       15.160578       2.592985
-FAST24       267.425904       2.597657
-FAST24       29.513286       2.600363
-COVER       930.675322       2.582597
+NODICT       0.000008       1.866385
+RANDOM       1.075766       2.309485
+LEGACY       8.688387       2.506775
+COVER       926.024689       2.582597
+COVER       33.630695       2.582597
+FAST15       152.845945       2.377689
+FAST15       2.206285       2.377689
+FAST16       147.772371       2.464814
+FAST16       1.937997       2.464814
+FAST17       147.729498       2.539834
+FAST17       1.966577       2.539834
+FAST18       144.156821       2.576924
+FAST18       1.954106       2.576924
+FAST19       145.678760       2.592479
+FAST19       2.096876       2.592479
+FAST20       159.634674       2.594551
+FAST20       2.568766       2.594551
+FAST21       228.116552       2.597128
+FAST21       4.634508       2.597128
+FAST22       288.890644       2.596971
+FAST22       6.618204       2.596971
+FAST23       377.196211       2.601416
+FAST23       13.497286       2.601416
+FAST24       503.208577       2.602830
+FAST24       29.538585       2.602830
index 9feaae59279f1ff8043a6372d0dcdb60e9e802da..a775eae3acfba19a98993a34cd750ea07c0cd960 100644 (file)
@@ -277,7 +277,8 @@ int main(int argCount, const char* argv[])
   int result = 0;
 
   /* Initialize arguments to default values */
-  const unsigned k = 200;
+  unsigned k = 200;
+  unsigned d = 8;
   const unsigned cLevel = DEFAULT_CLEVEL;
   const unsigned dictID = 0;
   const unsigned maxDictSize = g_defaultMaxDictSize;
@@ -360,16 +361,50 @@ int main(int argCount, const char* argv[])
     }
   }
 
+  /* for cover */
+  {
+    ZDICT_cover_params_t coverParam;
+    memset(&coverParam, 0, sizeof(coverParam));
+    coverParam.zParams = zParams;
+    coverParam.splitPoint = 1.0;
+    coverParam.steps = 40;
+    coverParam.nbThreads = 1;
+    const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
+    DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
+    if(coverOptResult) {
+      result = 1;
+      goto _cleanup;
+    }
+
+    k = coverParam.k;
+    d = coverParam.d;
+
+    /* for COVER with k and d provided */
+    ZDICT_cover_params_t covernParam;
+    memset(&covernParam, 0, sizeof(covernParam));
+    covernParam.zParams = zParams;
+    covernParam.splitPoint = 1.0;
+    covernParam.steps = 40;
+    covernParam.nbThreads = 1;
+    covernParam.k = k;
+    covernParam.d = d;
+    const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &covernParam, NULL, NULL);
+    DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", covernParam.k, covernParam.d, covernParam.steps, (unsigned)(covernParam.splitPoint * 100));
+    if(coverResult) {
+      result = 1;
+      goto _cleanup;
+    }
+  }
+
   /* for fastCover */
   for (unsigned f = 15; f < 25; f++){
     DISPLAYLEVEL(2, "current f is %u\n", f);
-    /* for fastCover (optimizing k) */
+    /* for fastCover (optimizing k and d) */
     {
       ZDICT_fastCover_params_t fastParam;
       memset(&fastParam, 0, sizeof(fastParam));
       fastParam.zParams = zParams;
       fastParam.splitPoint = 1.0;
-      fastParam.d = 8;
       fastParam.f = f;
       fastParam.steps = 40;
       fastParam.nbThreads = 1;
@@ -379,17 +414,21 @@ int main(int argCount, const char* argv[])
         result = 1;
         goto _cleanup;
       }
+
+      k = fastParam.k;
+      d = fastParam.d;
     }
 
-    /* for fastCover (with k provided) */
+
+    /* for fastCover (with k and d provided) */
     {
       ZDICT_fastCover_params_t fastParam;
       memset(&fastParam, 0, sizeof(fastParam));
       fastParam.zParams = zParams;
       fastParam.splitPoint = 1.0;
-      fastParam.d = 8;
+      fastParam.d = d;
       fastParam.f = f;
-      fastParam.k = 200;
+      fastParam.k = k;
       fastParam.steps = 40;
       fastParam.nbThreads = 1;
       const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
@@ -401,21 +440,7 @@ int main(int argCount, const char* argv[])
     }
   }
 
-  /* for cover */
-  {
-    ZDICT_cover_params_t coverParam;
-    memset(&coverParam, 0, sizeof(coverParam));
-    coverParam.zParams = zParams;
-    coverParam.splitPoint = 1.0;
-    coverParam.steps = 40;
-    coverParam.nbThreads = 1;
-    const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
-    DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
-    if(coverOptResult) {
-      result = 1;
-      goto _cleanup;
-    }
-  }
+
 
 
   /* Free allocated memory */
index 6f990e0c2b7a73b49baac3901cf22ffd1f8f9fa7..d6b3254ec8a786f752335ca476bda585c83815ad 100644 (file)
@@ -267,7 +267,7 @@ static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *
     size_t currSampleStart = ctx->offsets[i];
     size_t currSampleEnd = ctx->offsets[i+1];
     start = currSampleStart;
-    while (start + f < currSampleEnd) {
+    while (start + ctx->d <= currSampleEnd) {
       const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
       /* if no dmer with same hash value has been seen in current sample */
       if (inCurrSample[dmerIndex] == 0) {