From: Jennifer Liu Date: Fri, 27 Jul 2018 02:03:01 +0000 (-0700) Subject: Rerun cover and fastCover with optimized values X-Git-Tag: v0.0.29~58^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=759c543312fd722c6f351513411d6d57742c7e4e;p=thirdparty%2Fzstd.git Rerun cover and fastCover with optimized values --- diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md index 1ee4b19ba..04866b7e6 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md @@ -13,108 +13,113 @@ Benchmark given input files: make ARG= followed by permitted arguments make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" ###Benchmarking Result: - -For every f value for fast, the first one is optimize and the second one has k=200 +First Cover is optimize cover, second Cover uses optimized d and k from first one. +For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. github: -NODICT 0.000023 2.999642 -RANDOM 0.149020 8.786957 -LEGACY 0.854277 8.989482 -FAST15 8.764078 10.609015 -FAST15 0.232610 9.135669 -FAST16 9.597777 10.474574 -FAST16 0.243698 9.346482 -FAST17 9.385449 10.611737 -FAST17 0.268376 9.605798 -FAST18 9.988885 10.626382 -FAST18 0.311769 9.130565 -FAST19 10.737259 10.411729 -FAST19 0.331885 9.271814 -FAST20 10.479782 10.388895 -FAST20 0.498416 9.194115 -FAST21 21.189883 10.376394 -FAST21 1.098532 9.244456 -FAST22 39.849935 10.432555 -FAST22 2.590561 9.410930 -FAST23 75.832399 10.614747 -FAST23 6.108487 9.484150 -FAST24 139.782714 10.611753 -FAST24 13.029406 9.379030 -COVER 55.118542 10.641263 +NODICT 0.000004 2.999642 +RANDOM 0.146096 8.786957 +LEGACY 0.956888 8.989482 +COVER 56.596152 10.641263 +COVER 4.937047 10.641263 +FAST15 17.722269 10.586461 +FAST15 0.239135 10.586461 +FAST16 18.276179 10.492503 +FAST16 0.265285 10.492503 +FAST17 18.077916 10.611737 +FAST17 0.236573 10.611737 +FAST18 19.510150 10.621586 +FAST18 0.278683 10.621586 +FAST19 18.794350 10.629626 +FAST19 0.307943 10.629626 +FAST20 19.671099 10.610308 +FAST20 0.428814 10.610308 +FAST21 36.527238 10.625733 +FAST21 0.716384 10.625733 +FAST22 83.803521 10.625281 +FAST22 1.290246 10.625281 +FAST23 158.287924 10.602342 +FAST23 3.084848 10.602342 +FAST24 283.630941 10.603379 +FAST24 8.088933 10.603379 hg-commands -NODICT 0.000012 2.425291 -RANDOM 0.083071 3.489515 -LEGACY 0.835195 3.911896 -FAST15 0.163980 3.808375 -FAST16 6.373850 4.010783 -FAST16 0.160299 3.966604 -FAST17 6.668799 4.091602 -FAST17 0.172480 4.062773 -FAST18 6.266105 4.130824 -FAST18 0.171554 4.094666 -FAST19 6.869651 4.158180 -FAST19 0.209468 4.111289 -FAST20 8.267766 4.149707 -FAST20 0.331680 4.119873 -FAST21 18.824296 4.171784 -FAST21 0.783961 4.120884 -FAST22 33.321252 4.152035 -FAST22 1.854215 4.126626 -FAST23 60.775388 4.157595 -FAST23 4.040395 4.134222 -FAST24 110.910038 4.163091 -FAST24 8.505828 4.143533 -COVER 61.654796 4.131136 +NODICT 0.000007 2.425291 +RANDOM 0.084010 3.489515 +LEGACY 0.926763 3.911896 +COVER 62.036915 4.131136 +COVER 2.194398 4.131136 +FAST15 12.169025 3.903719 +FAST15 0.156552 3.903719 +FAST16 11.886255 4.005077 +FAST16 0.155506 4.005077 +FAST17 11.886955 4.097811 +FAST17 0.176327 4.097811 +FAST18 12.544698 4.136081 +FAST18 0.171796 4.136081 +FAST19 12.920868 4.166021 +FAST19 0.207029 4.166021 +FAST20 15.771429 4.163740 +FAST20 0.258685 4.163740 +FAST21 33.165829 4.157057 +FAST21 0.663088 4.157057 +FAST22 68.779201 4.158195 +FAST22 1.568439 4.158195 +FAST23 121.921931 4.161450 +FAST23 2.498972 4.161450 +FAST24 221.990451 4.159658 +FAST24 5.793594 4.159658 hg-changelog NODICT 0.000004 1.377613 -RANDOM 0.582067 2.096785 -LEGACY 2.739515 2.058273 -FAST15 35.682665 2.127596 -FAST15 0.931621 2.115299 -FAST16 36.557988 2.141787 -FAST16 1.008155 2.136080 -FAST17 36.272242 2.155332 -FAST17 0.906803 2.154596 -FAST18 35.542043 2.171997 -FAST18 1.063101 2.167723 -FAST19 37.756934 2.180893 -FAST19 1.257291 2.173768 -FAST20 40.273755 2.179442 -FAST20 1.630522 2.170072 -FAST21 54.606548 2.181400 -FAST21 2.321266 2.171643 -FAST22 72.454066 2.178774 -FAST22 5.092888 2.168885 -FAST23 106.753208 2.180347 -FAST23 14.722222 2.170673 -FAST24 171.083201 2.183426 -FAST24 27.575575 2.170623 -COVER 227.219660 2.188654 +RANDOM 0.549307 2.096785 +LEGACY 2.273818 2.058273 +COVER 219.640608 2.188654 +COVER 6.055391 2.188654 +FAST15 67.820700 2.127194 +FAST15 0.824624 2.127194 +FAST16 69.774209 2.145401 +FAST16 0.889737 2.145401 +FAST17 70.027355 2.157544 +FAST17 0.869004 2.157544 +FAST18 68.229652 2.173127 +FAST18 0.930689 2.173127 +FAST19 70.696241 2.179527 +FAST19 1.385515 2.179527 +FAST20 80.618172 2.183233 +FAST20 1.699632 2.183233 +FAST21 96.366254 2.180920 +FAST21 2.606553 2.180920 +FAST22 139.440758 2.184297 +FAST22 5.962606 2.184297 +FAST23 207.791930 2.187666 +FAST23 14.823301 2.187666 +FAST24 322.050385 2.189889 +FAST24 29.294918 2.189889 hg-manifest -NODICT 0.000007 1.866385 -RANDOM 1.086571 2.309485 -LEGACY 9.567507 2.506775 -FAST15 77.811380 2.380461 -FAST15 1.969718 2.317727 -FAST16 75.789019 2.469144 -FAST16 2.051283 2.375815 -FAST17 79.659040 2.539069 -FAST17 1.995394 2.501047 -FAST18 76.281105 2.578095 -FAST18 2.059272 2.564840 -FAST19 79.395382 2.590433 -FAST19 2.354158 2.591024 -FAST20 87.937568 2.597813 -FAST20 2.922189 2.597104 -FAST21 121.760549 2.598408 -FAST21 4.798981 2.600269 -FAST22 155.878461 2.594560 -FAST22 8.151807 2.601047 -FAST23 194.238003 2.596761 -FAST23 15.160578 2.592985 -FAST24 267.425904 2.597657 -FAST24 29.513286 2.600363 -COVER 930.675322 2.582597 +NODICT 0.000008 1.866385 +RANDOM 1.075766 2.309485 +LEGACY 8.688387 2.506775 +COVER 926.024689 2.582597 +COVER 33.630695 2.582597 +FAST15 152.845945 2.377689 +FAST15 2.206285 2.377689 +FAST16 147.772371 2.464814 +FAST16 1.937997 2.464814 +FAST17 147.729498 2.539834 +FAST17 1.966577 2.539834 +FAST18 144.156821 2.576924 +FAST18 1.954106 2.576924 +FAST19 145.678760 2.592479 +FAST19 2.096876 2.592479 +FAST20 159.634674 2.594551 +FAST20 2.568766 2.594551 +FAST21 228.116552 2.597128 +FAST21 4.634508 2.597128 +FAST22 288.890644 2.596971 +FAST22 6.618204 2.596971 +FAST23 377.196211 2.601416 +FAST23 13.497286 2.601416 +FAST24 503.208577 2.602830 +FAST24 29.538585 2.602830 diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c index 9feaae592..a775eae3a 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c @@ -277,7 +277,8 @@ int main(int argCount, const char* argv[]) int result = 0; /* Initialize arguments to default values */ - const unsigned k = 200; + unsigned k = 200; + unsigned d = 8; const unsigned cLevel = DEFAULT_CLEVEL; const unsigned dictID = 0; const unsigned maxDictSize = g_defaultMaxDictSize; @@ -360,16 +361,50 @@ int main(int argCount, const char* argv[]) } } + /* for cover */ + { + ZDICT_cover_params_t coverParam; + memset(&coverParam, 0, sizeof(coverParam)); + coverParam.zParams = zParams; + coverParam.splitPoint = 1.0; + coverParam.steps = 40; + coverParam.nbThreads = 1; + const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL); + DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100)); + if(coverOptResult) { + result = 1; + goto _cleanup; + } + + k = coverParam.k; + d = coverParam.d; + + /* for COVER with k and d provided */ + ZDICT_cover_params_t covernParam; + memset(&covernParam, 0, sizeof(covernParam)); + covernParam.zParams = zParams; + covernParam.splitPoint = 1.0; + covernParam.steps = 40; + covernParam.nbThreads = 1; + covernParam.k = k; + covernParam.d = d; + const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &covernParam, NULL, NULL); + DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", covernParam.k, covernParam.d, covernParam.steps, (unsigned)(covernParam.splitPoint * 100)); + if(coverResult) { + result = 1; + goto _cleanup; + } + } + /* for fastCover */ for (unsigned f = 15; f < 25; f++){ DISPLAYLEVEL(2, "current f is %u\n", f); - /* for fastCover (optimizing k) */ + /* for fastCover (optimizing k and d) */ { ZDICT_fastCover_params_t fastParam; memset(&fastParam, 0, sizeof(fastParam)); fastParam.zParams = zParams; fastParam.splitPoint = 1.0; - fastParam.d = 8; fastParam.f = f; fastParam.steps = 40; fastParam.nbThreads = 1; @@ -379,17 +414,21 @@ int main(int argCount, const char* argv[]) result = 1; goto _cleanup; } + + k = fastParam.k; + d = fastParam.d; } - /* for fastCover (with k provided) */ + + /* for fastCover (with k and d provided) */ { ZDICT_fastCover_params_t fastParam; memset(&fastParam, 0, sizeof(fastParam)); fastParam.zParams = zParams; fastParam.splitPoint = 1.0; - fastParam.d = 8; + fastParam.d = d; fastParam.f = f; - fastParam.k = 200; + fastParam.k = k; fastParam.steps = 40; fastParam.nbThreads = 1; const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); @@ -401,21 +440,7 @@ int main(int argCount, const char* argv[]) } } - /* for cover */ - { - ZDICT_cover_params_t coverParam; - memset(&coverParam, 0, sizeof(coverParam)); - coverParam.zParams = zParams; - coverParam.splitPoint = 1.0; - coverParam.steps = 40; - coverParam.nbThreads = 1; - const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL); - DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100)); - if(coverOptResult) { - result = 1; - goto _cleanup; - } - } + /* Free allocated memory */ diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c index 6f990e0c2..d6b3254ec 100644 --- a/contrib/experimental_dict_builders/fastCover/fastCover.c +++ b/contrib/experimental_dict_builders/fastCover/fastCover.c @@ -267,7 +267,7 @@ static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t * size_t currSampleStart = ctx->offsets[i]; size_t currSampleEnd = ctx->offsets[i+1]; start = currSampleStart; - while (start + f < currSampleEnd) { + while (start + ctx->d <= currSampleEnd) { const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d); /* if no dmer with same hash value has been seen in current sample */ if (inCurrSample[dmerIndex] == 0) {