]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Initial benchmarking result for fastCover
authorJennifer Liu <jenniferliu620@fb.com>
Thu, 26 Jul 2018 00:05:54 +0000 (17:05 -0700)
committerJennifer Liu <jenniferliu620@fb.com>
Thu, 26 Jul 2018 00:05:54 +0000 (17:05 -0700)
contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile
contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c

index 72ce04f2a56bd2f4854b0290322b1c1084f53ce1..681494888d97c837407130c200e69a59f32bfdc5 100644 (file)
@@ -2,9 +2,10 @@ ARG :=
 
 CC ?= gcc
 CFLAGS ?= -O3
-INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
+INCLUDES := -I ../randomDictBuilder -I ../fastCover -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
 
 RANDOM_FILE := ../randomDictBuilder/random.c
+FAST_FILE := ../fastCover/fastCover.c
 IO_FILE := ../randomDictBuilder/io.c
 
 all: run clean
@@ -21,8 +22,8 @@ test: benchmarkTest clean
 benchmarkTest: benchmark test.sh
        sh test.sh
 
-benchmark: benchmark.o io.o random.o libzstd.a
-       $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
+benchmark: benchmark.o io.o random.o fastCover.o libzstd.a
+       $(CC) $(CFLAGS) benchmark.o io.o random.o fastCover.o libzstd.a -o benchmark
 
 benchmark.o: benchmark.c
        $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
@@ -30,6 +31,9 @@ benchmark.o: benchmark.c
 random.o: $(RANDOM_FILE)
        $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
 
+fastCover.o: $(FAST_FILE)
+       $(CC) $(CFLAGS) $(INCLUDES) -c $(FAST_FILE)
+
 io.o: $(IO_FILE)
        $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
 
index de783a0ecdaf044db8e3a363f3beaa545b43577c..e02d592c454fa208f197df5a6104792019baf5bd 100644 (file)
@@ -18,30 +18,34 @@ github:
 | Algorithm     | Speed(sec)    | Compression Ratio  |
 | ------------- |:-------------:| ------------------:|
 | nodict        | 0.000004      |  2.999642          |
-| random        | 0.180238      |  8.786957          |
-| cover         | 33.891987     |  10.430999         |
-| legacy        | 1.077569      |  8.989482          |
+| random        | 0.135459      |  8.786957          |
+| cover         | 50.341079     |  10.641263         |
+| legacy        | 0.866283      |  8.989482          |
+| fastCover     | 13.450947     |  10.215174         |
 
 hg-commands
 | Algorithm     | Speed(sec)    | Compression Ratio  |
 | ------------- |:-------------:| ------------------:|
-| nodict        | 0.000006      |  2.425291          |
-| random        | 0.088735      |  3.489515          |
-| cover         | 35.447300     |  4.030274          |
-| legacy        | 1.048509      |  3.911896          |
+| nodict        | 0.000020      |  2.425291          |
+| random        | 0.088828      |  3.489515          |
+| cover         | 60.028672     |  4.131136          |
+| legacy        | 0.852481      |  3.911896          |
+| fastCover     | 9.524284      |  3.977229          |
 
-hg-manifest
+hg-changelog
 | Algorithm     | Speed(sec)    | Compression Ratio  |
 | ------------- |:-------------:| ------------------:|
-| nodict        | 0.000005      |  1.866385          |
-| random        | 1.148231      |  2.309485          |
-| cover         | 509.685257    |  2.575331          |
-| legacy        | 10.705866     |  2.506775          |
+| nodict        | 0.000004      |  1.377613          |
+| random        | 0.621812      |  2.096785          |
+| cover         | 217.510962    |  2.188654          |
+| legacy        | 2.559194      |  2.058273          |
+| fastCover     | 51.132516     |  2.124185          |
 
-hg-changelog
+hg-manifest
 | Algorithm     | Speed(sec)    | Compression Ratio  |
 | ------------- |:-------------:| ------------------:|
-| nodict        | 0.000005      |  1.377613          |
-| random        | 0.706434      |  2.096785          |
-| cover         | 122.815783    |  2.175706          |
-| legacy        | 3.010318      |  2.058273          |
+| nodict        | 0.000005      |  1.866385          |
+| random        | 1.035220      |  2.309485          |
+| cover         | 930.480173    |  2.582597          |
+| legacy        | 8.916513      |  2.506775          |
+| fastCover     | 116.871089    |  2.525689          |
index 640419649a3170d2e7d5b7bd406c2fa8a537e1df..865ecb34d671b230804956fb629dae933d58f9e0 100644 (file)
@@ -5,6 +5,7 @@
 #include <ctype.h>
 #include <time.h>
 #include "random.h"
+#include "fastCover.h"
 #include "dictBuilder.h"
 #include "zstd_internal.h" /* includes zstd.h */
 #include "io.h"
@@ -71,10 +72,11 @@ typedef struct {
  */
 dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
                   ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
-                  ZDICT_legacy_params_t *legacyParams) {
+                  ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
     unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
                                   coverParams ? coverParams->zParams.notificationLevel :
                                   legacyParams ? legacyParams->zParams.notificationLevel :
+                                  fastParams ? fastParams->zParams.notificationLevel :
                                   DEFAULT_DISPLAYLEVEL;   /* no dict */
     void* const dictBuffer = malloc(maxDictSize);
 
@@ -94,6 +96,9 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
         } else if(legacyParams) {
           dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
                                                info->samplesSizes, info->nbSamples, *legacyParams);
+        } else if(fastParams) {
+          dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
+                                                info->samplesSizes, info->nbSamples, fastParams);
         } else {
           dictSize = 0;
         }
@@ -216,25 +221,29 @@ void freeDictInfo(dictInfo* info) {
  *  @return 0 if benchmark successfully, 1 otherwise
  */
 int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
-                        ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam) {
+                        ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
+                        ZDICT_fastCover_params_t *fastParam) {
   /* Local variables */
   const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
                                 coverParam ? coverParam->zParams.notificationLevel :
                                 legacyParam ? legacyParam->zParams.notificationLevel :
+                                fastParam ? fastParam->zParams.notificationLevel:
                                 DEFAULT_DISPLAYLEVEL;   /* no dict */
   const char* name = randomParam ? "RANDOM" :
                     coverParam ? "COVER" :
                     legacyParam ? "LEGACY" :
+                    fastParam ? "FAST":
                     "NODICT";    /* no dict */
   const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
                           coverParam ? coverParam->zParams.compressionLevel :
                           legacyParam ? legacyParam->zParams.compressionLevel :
+                          fastParam ? fastParam->zParams.compressionLevel:
                           DEFAULT_CLEVEL;   /* no dict */
   int result = 0;
 
   /* Calculate speed */
   const UTIL_time_t begin = UTIL_getTime();
-  dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam);
+  dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
   const U64 timeMicro = UTIL_clockSpanMicro(begin);
   const double timeSec = timeMicro / (double)SEC_TO_MICRO;
   if (!dInfo) {
@@ -269,7 +278,6 @@ int main(int argCount, const char* argv[])
 
   /* Initialize arguments to default values */
   const unsigned k = 200;
-  const unsigned d = 6;
   const unsigned cLevel = DEFAULT_CLEVEL;
   const unsigned dictID = 0;
   const unsigned maxDictSize = g_defaultMaxDictSize;
@@ -319,7 +327,7 @@ int main(int argCount, const char* argv[])
 
   /* with no dict */
   {
-    const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL);
+    const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
     if(noDictResult) {
       result = 1;
       goto _cleanup;
@@ -331,7 +339,7 @@ int main(int argCount, const char* argv[])
     ZDICT_random_params_t randomParam;
     randomParam.zParams = zParams;
     randomParam.k = k;
-    const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL);
+    const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
     if(randomResult) {
       result = 1;
       goto _cleanup;
@@ -344,10 +352,9 @@ int main(int argCount, const char* argv[])
     memset(&coverParam, 0, sizeof(coverParam));
     coverParam.zParams = zParams;
     coverParam.splitPoint = 1.0;
-    coverParam.d = d;
     coverParam.steps = 40;
     coverParam.nbThreads = 1;
-    const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL);
+    const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
     if(coverOptResult) {
       result = 1;
       goto _cleanup;
@@ -359,13 +366,30 @@ int main(int argCount, const char* argv[])
     ZDICT_legacy_params_t legacyParam;
     legacyParam.zParams = zParams;
     legacyParam.selectivityLevel = 9;
-    const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam);
+    const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
     if(legacyResult) {
       result = 1;
       goto _cleanup;
     }
   }
 
+  /* for fastCover */
+  {
+    ZDICT_fastCover_params_t fastParam;
+    memset(&fastParam, 0, sizeof(fastParam));
+    fastParam.zParams = zParams;
+    fastParam.splitPoint = 1.0;
+    fastParam.d = 8;
+    fastParam.f = 23;
+    fastParam.steps = 40;
+    fastParam.nbThreads = 1;
+    const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
+    if(fastOptResult) {
+      result = 1;
+      goto _cleanup;
+    }
+  }
+
   /* Free allocated memory */
 _cleanup:
   UTIL_freeFileList(extendedFileList, fileNamesBuf);