From: Jennifer Liu Date: Mon, 30 Jul 2018 19:54:22 +0000 (-0700) Subject: Increment frequency for every dmer occurence within same sample instead of at most... X-Git-Tag: v0.0.29~56^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F1257%2Fhead;p=thirdparty%2Fzstd.git Increment frequency for every dmer occurence within same sample instead of at most once per sample --- diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md index 20fbde954..1fdd323c2 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md @@ -18,109 +18,109 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" - Fourth column is chosen d and fifth column is chosen k github: -NODICT 0.000005 2.999642 -RANDOM 0.141553 8.786957 -LEGACY 0.904340 8.989482 -COVER 53.621302 10.641263 8 1298 -COVER 4.085037 10.641263 8 1298 -FAST15 17.636211 10.586461 8 1778 -FAST15 0.221236 10.586461 8 1778 -FAST16 18.716259 10.492503 6 1778 -FAST16 0.251522 10.492503 6 1778 -FAST17 17.614391 10.611737 8 1778 -FAST17 0.241011 10.611737 8 1778 -FAST18 19.926270 10.621586 8 1778 -FAST18 0.287195 10.621586 8 1778 -FAST19 19.626808 10.629626 8 1778 -FAST19 0.340191 10.629626 8 1778 -FAST20 18.918657 10.610308 8 1778 -FAST20 0.463307 10.610308 8 1778 -FAST21 20.502362 10.625733 8 1778 -FAST21 0.638202 10.625733 8 1778 -FAST22 22.702695 10.625281 8 1778 -FAST22 1.353399 10.625281 8 1778 -FAST23 28.041990 10.602342 8 1778 -FAST23 3.029502 10.602342 8 1778 -FAST24 35.662961 10.603379 8 1778 -FAST24 6.524258 10.603379 8 1778 +NODICT 0.000004 2.999642 +RANDOM 0.161907 8.786957 +LEGACY 0.960128 8.989482 +COVER 69.031037 10.641263 8 1298 +COVER 7.017782 10.641263 8 1298 +FAST15 24.710713 10.547583 8 1874 +FAST15 0.271657 10.547583 8 1874 +FAST16 23.906902 10.690723 8 1106 +FAST16 0.315039 10.690723 8 1106 +FAST17 25.384572 10.642322 8 1106 +FAST17 0.319237 10.642322 8 1106 +FAST18 21.935494 10.491283 8 1826 +FAST18 0.255488 10.491283 8 1826 +FAST19 21.349385 10.522182 8 1826 +FAST19 0.311369 10.522182 8 1826 +FAST20 23.124955 10.487431 8 1826 +FAST20 0.317411 10.487431 8 1826 +FAST21 27.311387 10.491047 8 1778 +FAST21 0.398483 10.491047 8 1778 +FAST22 23.993620 10.502191 8 1826 +FAST22 0.329767 10.502191 8 1826 +FAST23 27.793381 10.502191 8 1826 +FAST23 0.359659 10.502191 8 1826 +FAST24 29.281399 10.509461 8 1826 +FAST24 0.398369 10.509461 8 1826 hg-commands: -NODICT 0.000005 2.425291 -RANDOM 0.080469 3.489515 -LEGACY 0.794417 3.911896 -COVER 54.198788 4.131136 8 386 -COVER 2.191729 4.131136 8 386 -FAST15 11.852793 3.903719 6 1106 -FAST15 0.175406 3.903719 6 1106 -FAST16 12.863315 4.005077 8 530 -FAST16 0.158410 4.005077 8 530 -FAST17 11.977917 4.097811 8 818 -FAST17 0.162381 4.097811 8 818 -FAST18 11.749304 4.136081 8 770 -FAST18 0.173242 4.136081 8 770 -FAST19 11.905785 4.166021 8 530 -FAST19 0.186403 4.166021 8 530 -FAST20 13.293999 4.163740 8 482 -FAST20 0.241508 4.163740 8 482 -FAST21 16.623177 4.157057 8 434 -FAST21 0.372647 4.157057 8 434 -FAST22 20.918409 4.158195 8 290 -FAST22 0.570431 4.158195 8 290 -FAST23 21.762805 4.161450 8 434 -FAST23 1.162206 4.161450 8 434 -FAST24 29.133745 4.159658 8 338 -FAST24 3.054376 4.159658 8 338 +NODICT 0.000007 2.425291 +RANDOM 0.083477 3.489515 +LEGACY 0.941867 3.911896 +COVER 67.314295 4.131136 8 386 +COVER 2.757895 4.131136 8 386 +FAST15 13.466983 3.920128 6 1106 +FAST15 0.162656 3.920128 6 1106 +FAST16 12.618110 4.032422 8 674 +FAST16 0.159073 4.032422 8 674 +FAST17 12.883772 4.063581 8 1490 +FAST17 0.183131 4.063581 8 1490 +FAST18 13.904432 4.085034 8 290 +FAST18 0.161078 4.085034 8 290 +FAST19 13.762269 4.097054 8 578 +FAST19 0.179906 4.097054 8 578 +FAST20 15.303927 4.101575 8 434 +FAST20 0.213146 4.101575 8 434 +FAST21 19.619482 4.104879 8 530 +FAST21 0.289158 4.104879 8 530 +FAST22 23.187937 4.102448 8 530 +FAST22 0.335220 4.102448 8 530 +FAST23 24.946655 4.095162 8 914 +FAST23 0.396927 4.095162 8 914 +FAST24 27.634065 4.114624 8 722 +FAST24 0.434278 4.114624 8 722 hg-changelog: -NODICT 0.000006 1.377613 -RANDOM 0.601346 2.096785 -LEGACY 2.544973 2.058273 -COVER 222.639708 2.188654 8 98 -COVER 6.072892 2.188654 8 98 -FAST15 70.394523 2.127194 8 866 -FAST15 0.899766 2.127194 8 866 -FAST16 69.845529 2.145401 8 338 -FAST16 0.881569 2.145401 8 338 -FAST17 69.382431 2.157544 8 194 -FAST17 0.943291 2.157544 8 194 -FAST18 71.348283 2.173127 8 98 -FAST18 1.034765 2.173127 8 98 -FAST19 71.380923 2.179527 8 98 -FAST19 1.254700 2.179527 8 98 -FAST20 72.802714 2.183233 6 98 -FAST20 1.368704 2.183233 6 98 -FAST21 82.042339 2.180920 8 98 -FAST21 2.213864 2.180920 8 98 -FAST22 90.666200 2.184297 8 98 -FAST22 3.590399 2.184297 8 98 -FAST23 108.926377 2.187666 6 98 -FAST23 8.723759 2.187666 6 98 -FAST24 134.296232 2.189889 6 98 -FAST24 19.396532 2.189889 6 98 +NODICT 0.000027 1.377613 +RANDOM 0.676272 2.096785 +LEGACY 2.871887 2.058273 +COVER 226.371004 2.188654 8 98 +COVER 5.359820 2.188654 8 98 +FAST15 66.776425 2.130548 6 386 +FAST15 0.796836 2.130548 6 386 +FAST16 64.405113 2.144136 8 194 +FAST16 0.778969 2.144136 8 194 +FAST17 65.062292 2.155745 8 98 +FAST17 0.822089 2.155745 8 98 +FAST18 65.819104 2.172062 6 98 +FAST18 0.804247 2.172062 6 98 +FAST19 66.184016 2.179446 6 98 +FAST19 0.883526 2.179446 6 98 +FAST20 72.900924 2.187017 6 98 +FAST20 0.908220 2.187017 6 98 +FAST21 77.869945 2.183583 6 146 +FAST21 0.932666 2.183583 6 146 +FAST22 84.041413 2.182030 6 98 +FAST22 1.092310 2.182030 6 98 +FAST23 89.539265 2.185291 8 98 +FAST23 1.294779 2.185291 8 98 +FAST24 97.193482 2.184939 6 98 +FAST24 1.270493 2.184939 6 98 hg-manifest: -NODICT 0.000005 1.866385 -RANDOM 0.982192 2.309485 -LEGACY 9.507729 2.506775 -COVER 922.742066 2.582597 8 434 -COVER 36.500276 2.582597 8 434 -FAST15 163.886717 2.377689 8 1682 -FAST15 2.107328 2.377689 8 1682 -FAST16 152.684592 2.464814 8 1538 -FAST16 2.157789 2.464814 8 1538 -FAST17 154.463459 2.539834 6 1826 -FAST17 2.282455 2.539834 6 1826 -FAST18 155.540044 2.576924 8 1922 -FAST18 2.101807 2.576924 8 1922 -FAST19 152.650343 2.592479 6 290 -FAST19 2.359461 2.592479 6 290 -FAST20 174.623634 2.594551 8 194 -FAST20 2.870022 2.594551 8 194 -FAST21 219.876653 2.597128 6 194 -FAST21 4.386269 2.597128 6 194 -FAST22 247.986803 2.596971 6 386 -FAST22 6.201144 2.596971 6 386 -FAST23 276.051806 2.601416 8 194 -FAST23 11.613477 2.601416 8 194 -FAST24 328.234024 2.602830 6 194 -FAST24 26.710364 2.602830 6 194 +NODICT 0.000004 1.866385 +RANDOM 0.969045 2.309485 +LEGACY 8.849052 2.506775 +COVER 905.855524 2.582597 8 434 +COVER 34.951973 2.582597 8 434 +FAST15 154.816926 2.391764 6 1826 +FAST15 1.932845 2.391764 6 1826 +FAST16 142.197120 2.480738 6 1922 +FAST16 1.759330 2.480738 6 1922 +FAST17 147.276099 2.548313 6 1682 +FAST17 1.819175 2.548313 6 1682 +FAST18 164.543366 2.567448 6 386 +FAST18 2.728845 2.567448 6 386 +FAST19 195.670852 2.581170 8 338 +FAST19 2.439487 2.581170 8 338 +FAST20 195.716408 2.587062 6 194 +FAST20 2.056303 2.587062 6 194 +FAST21 211.483191 2.590136 6 242 +FAST21 2.983587 2.590136 6 242 +FAST22 239.562966 2.591033 6 194 +FAST22 3.355746 2.591033 6 194 +FAST23 264.547195 2.590403 8 434 +FAST23 3.667851 2.590403 8 434 +FAST24 296.258379 2.591723 6 290 +FAST24 3.858688 2.591723 6 290 diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c index 3c1aa951c..cf71075ab 100644 --- a/contrib/experimental_dict_builders/fastCover/fastCover.c +++ b/contrib/experimental_dict_builders/fastCover/fastCover.c @@ -266,25 +266,17 @@ static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) { * Calculate for frequency of hash value of each dmer in ctx->samples */ static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){ - /* inCurrSample keeps track of this hash value has already be seen in previous dmers in the same sample*/ - BYTE* inCurrSample = (BYTE *)malloc((1 << f) * sizeof(BYTE)); size_t start; /* start of current dmer */ for (unsigned i = 0; i < ctx->nbTrainSamples; i++) { - memset(inCurrSample, 0, (1 << f) * sizeof(*inCurrSample)); /* Reset inCurrSample for each sample */ size_t currSampleStart = ctx->offsets[i]; size_t currSampleEnd = ctx->offsets[i+1]; start = currSampleStart; while (start + ctx->d <= currSampleEnd) { const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d); - /* if no dmer with same hash value has been seen in current sample */ - if (inCurrSample[dmerIndex] == 0) { - inCurrSample[dmerIndex]++; - freqs[dmerIndex]++; - } + freqs[dmerIndex]++; start++; } } - free(inCurrSample); } /**