]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Increment frequency for every dmer occurence within same sample instead of at most... 1257/head
authorJennifer Liu <jenniferliu620@fb.com>
Mon, 30 Jul 2018 19:54:22 +0000 (12:54 -0700)
committerJennifer Liu <jenniferliu620@fb.com>
Mon, 30 Jul 2018 19:54:22 +0000 (12:54 -0700)
contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
contrib/experimental_dict_builders/fastCover/fastCover.c

index 20fbde954c64e3df5bb1790680c6142efeda3d69..1fdd323c210b9ea0c72c6e0bce0745814ac10e28 100644 (file)
@@ -18,109 +18,109 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
 - Fourth column is chosen d and fifth column is chosen k
 
 github:
-NODICT       0.000005       2.999642        
-RANDOM       0.141553       8.786957        
-LEGACY       0.904340       8.989482        
-COVER       53.621302       10.641263        8          1298
-COVER       4.085037       10.641263        8          1298
-FAST15       17.636211       10.586461        8          1778
-FAST15       0.221236       10.586461        8          1778
-FAST16       18.716259       10.492503        6          1778
-FAST16       0.251522       10.492503        6          1778
-FAST17       17.614391       10.611737        8          1778
-FAST17       0.241011       10.611737        8          1778
-FAST18       19.926270       10.621586        8          1778
-FAST18       0.287195       10.621586        8          1778
-FAST19       19.626808       10.629626        8          1778
-FAST19       0.340191       10.629626        8          1778
-FAST20       18.918657       10.610308        8          1778
-FAST20       0.463307       10.610308        8          1778
-FAST21       20.502362       10.625733        8          1778
-FAST21       0.638202       10.625733        8          1778
-FAST22       22.702695       10.625281        8          1778
-FAST22       1.353399       10.625281        8          1778
-FAST23       28.041990       10.602342        8          1778
-FAST23       3.029502       10.602342        8          1778
-FAST24       35.662961       10.603379        8          1778
-FAST24       6.524258       10.603379        8          1778
+NODICT       0.000004       2.999642        
+RANDOM       0.161907       8.786957        
+LEGACY       0.960128       8.989482        
+COVER       69.031037       10.641263        8          1298
+COVER       7.017782       10.641263        8          1298
+FAST15       24.710713       10.547583        8          1874
+FAST15       0.271657       10.547583        8          1874
+FAST16       23.906902       10.690723        8          1106
+FAST16       0.315039       10.690723        8          1106
+FAST17       25.384572       10.642322        8          1106
+FAST17       0.319237       10.642322        8          1106
+FAST18       21.935494       10.491283        8          1826
+FAST18       0.255488       10.491283        8          1826
+FAST19       21.349385       10.522182        8          1826
+FAST19       0.311369       10.522182        8          1826
+FAST20       23.124955       10.487431        8          1826
+FAST20       0.317411       10.487431        8          1826
+FAST21       27.311387       10.491047        8          1778
+FAST21       0.398483       10.491047        8          1778
+FAST22       23.993620       10.502191        8          1826
+FAST22       0.329767       10.502191        8          1826
+FAST23       27.793381       10.502191        8          1826
+FAST23       0.359659       10.502191        8          1826
+FAST24       29.281399       10.509461        8          1826
+FAST24       0.398369       10.509461        8          1826
 
 hg-commands:
-NODICT       0.000005       2.425291        
-RANDOM       0.080469       3.489515        
-LEGACY       0.794417       3.911896        
-COVER       54.198788       4.131136        8          386
-COVER       2.191729       4.131136        8          386
-FAST15       11.852793       3.903719        6          1106
-FAST15       0.175406       3.903719        6          1106
-FAST16       12.863315       4.005077        8          530
-FAST16       0.158410       4.005077        8          530
-FAST17       11.977917       4.097811        8          818
-FAST17       0.162381       4.097811        8          818
-FAST18       11.749304       4.136081        8          770
-FAST18       0.173242       4.136081        8          770
-FAST19       11.905785       4.166021        8          530
-FAST19       0.186403       4.166021        8          530
-FAST20       13.293999       4.163740        8          482
-FAST20       0.241508       4.163740        8          482
-FAST21       16.623177       4.157057        8          434
-FAST21       0.372647       4.157057        8          434
-FAST22       20.918409       4.158195        8          290
-FAST22       0.570431       4.158195        8          290
-FAST23       21.762805       4.161450        8          434
-FAST23       1.162206       4.161450        8          434
-FAST24       29.133745       4.159658        8          338
-FAST24       3.054376       4.159658        8          338
+NODICT       0.000007       2.425291        
+RANDOM       0.083477       3.489515        
+LEGACY       0.941867       3.911896        
+COVER       67.314295       4.131136        8          386
+COVER       2.757895       4.131136        8          386
+FAST15       13.466983       3.920128        6          1106
+FAST15       0.162656       3.920128        6          1106
+FAST16       12.618110       4.032422        8          674
+FAST16       0.159073       4.032422        8          674
+FAST17       12.883772       4.063581        8          1490
+FAST17       0.183131       4.063581        8          1490
+FAST18       13.904432       4.085034        8          290
+FAST18       0.161078       4.085034        8          290
+FAST19       13.762269       4.097054        8          578
+FAST19       0.179906       4.097054        8          578
+FAST20       15.303927       4.101575        8          434
+FAST20       0.213146       4.101575        8          434
+FAST21       19.619482       4.104879        8          530
+FAST21       0.289158       4.104879        8          530
+FAST22       23.187937       4.102448        8          530
+FAST22       0.335220       4.102448        8          530
+FAST23       24.946655       4.095162        8          914
+FAST23       0.396927       4.095162        8          914
+FAST24       27.634065       4.114624        8          722
+FAST24       0.434278       4.114624        8          722
 
 hg-changelog:
-NODICT       0.000006       1.377613        
-RANDOM       0.601346       2.096785        
-LEGACY       2.544973       2.058273        
-COVER       222.639708       2.188654        8          98
-COVER       6.072892       2.188654        8          98
-FAST15       70.394523       2.127194        8          866
-FAST15       0.899766       2.127194        8          866
-FAST16       69.845529       2.145401        8          338
-FAST16       0.881569       2.145401        8          338
-FAST17       69.382431       2.157544        8          194
-FAST17       0.943291       2.157544        8          194
-FAST18       71.348283       2.173127        8          98
-FAST18       1.034765       2.173127        8          98
-FAST19       71.380923       2.179527        8          98
-FAST19       1.254700       2.179527        8          98
-FAST20       72.802714       2.183233        6          98
-FAST20       1.368704       2.183233        6          98
-FAST21       82.042339       2.180920        8          98
-FAST21       2.213864       2.180920        8          98
-FAST22       90.666200       2.184297        8          98
-FAST22       3.590399       2.184297        8          98
-FAST23       108.926377       2.187666        6          98
-FAST23       8.723759       2.187666        6          98
-FAST24       134.296232       2.189889        6          98
-FAST24       19.396532       2.189889        6          98
+NODICT       0.000027       1.377613        
+RANDOM       0.676272       2.096785        
+LEGACY       2.871887       2.058273        
+COVER       226.371004       2.188654        8          98
+COVER       5.359820       2.188654        8          98
+FAST15       66.776425       2.130548        6          386
+FAST15       0.796836       2.130548        6          386
+FAST16       64.405113       2.144136        8          194
+FAST16       0.778969       2.144136        8          194
+FAST17       65.062292       2.155745        8          98
+FAST17       0.822089       2.155745        8          98
+FAST18       65.819104       2.172062        6          98
+FAST18       0.804247       2.172062        6          98
+FAST19       66.184016       2.179446        6          98
+FAST19       0.883526       2.179446        6          98
+FAST20       72.900924       2.187017        6          98
+FAST20       0.908220       2.187017        6          98
+FAST21       77.869945       2.183583        6          146
+FAST21       0.932666       2.183583        6          146
+FAST22       84.041413       2.182030        6          98
+FAST22       1.092310       2.182030        6          98
+FAST23       89.539265       2.185291        8          98
+FAST23       1.294779       2.185291        8          98
+FAST24       97.193482       2.184939        6          98
+FAST24       1.270493       2.184939        6          98
 
 hg-manifest:
-NODICT       0.000005       1.866385        
-RANDOM       0.982192       2.309485        
-LEGACY       9.507729       2.506775        
-COVER       922.742066       2.582597        8          434
-COVER       36.500276       2.582597        8          434
-FAST15       163.886717       2.377689        8          1682
-FAST15       2.107328       2.377689        8          1682
-FAST16       152.684592       2.464814        8          1538
-FAST16       2.157789       2.464814        8          1538
-FAST17       154.463459       2.539834        6          1826
-FAST17       2.282455       2.539834        6          1826
-FAST18       155.540044       2.576924        8          1922
-FAST18       2.101807       2.576924        8          1922
-FAST19       152.650343       2.592479        6          290
-FAST19       2.359461       2.592479        6          290
-FAST20       174.623634       2.594551        8          194
-FAST20       2.870022       2.594551        8          194
-FAST21       219.876653       2.597128        6          194
-FAST21       4.386269       2.597128        6          194
-FAST22       247.986803       2.596971        6          386
-FAST22       6.201144       2.596971        6          386
-FAST23       276.051806       2.601416        8          194
-FAST23       11.613477       2.601416        8          194
-FAST24       328.234024       2.602830        6          194
-FAST24       26.710364       2.602830        6          194
+NODICT       0.000004       1.866385        
+RANDOM       0.969045       2.309485        
+LEGACY       8.849052       2.506775        
+COVER       905.855524       2.582597        8          434
+COVER       34.951973       2.582597        8          434
+FAST15       154.816926       2.391764        6          1826
+FAST15       1.932845       2.391764        6          1826
+FAST16       142.197120       2.480738        6          1922
+FAST16       1.759330       2.480738        6          1922
+FAST17       147.276099       2.548313        6          1682
+FAST17       1.819175       2.548313        6          1682
+FAST18       164.543366       2.567448        6          386
+FAST18       2.728845       2.567448        6          386
+FAST19       195.670852       2.581170        8          338
+FAST19       2.439487       2.581170        8          338
+FAST20       195.716408       2.587062        6          194
+FAST20       2.056303       2.587062        6          194
+FAST21       211.483191       2.590136        6          242
+FAST21       2.983587       2.590136        6          242
+FAST22       239.562966       2.591033        6          194
+FAST22       3.355746       2.591033        6          194
+FAST23       264.547195       2.590403        8          434
+FAST23       3.667851       2.590403        8          434
+FAST24       296.258379       2.591723        6          290
+FAST24       3.858688       2.591723        6          290
index 3c1aa951cf74f612e425aeb40a9554c67dff7e1d..cf71075ab64401fff38c694e3dc49d9d52139f84 100644 (file)
@@ -266,25 +266,17 @@ static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
  * Calculate for frequency of hash value of each dmer in ctx->samples
  */
 static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
-  /* inCurrSample keeps track of this hash value has already be seen in previous dmers in the same sample*/
-  BYTE* inCurrSample = (BYTE *)malloc((1 << f) * sizeof(BYTE));
   size_t start; /* start of current dmer */
   for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
-    memset(inCurrSample, 0, (1 << f) * sizeof(*inCurrSample)); /* Reset inCurrSample for each sample */
     size_t currSampleStart = ctx->offsets[i];
     size_t currSampleEnd = ctx->offsets[i+1];
     start = currSampleStart;
     while (start + ctx->d <= currSampleEnd) {
       const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
-      /* if no dmer with same hash value has been seen in current sample */
-      if (inCurrSample[dmerIndex] == 0) {
-        inCurrSample[dmerIndex]++;
-        freqs[dmerIndex]++;
-      }
+      freqs[dmerIndex]++;
       start++;
     }
   }
-  free(inCurrSample);
 }
 
 /**