]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
added some tests for correctness, time, and compression ratio
authorPaul Cruz <paulcruz74@fb.com>
Thu, 13 Jul 2017 20:50:23 +0000 (13:50 -0700)
committerPaul Cruz <paulcruz74@fb.com>
Thu, 13 Jul 2017 20:50:23 +0000 (13:50 -0700)
contrib/adaptive-compression/Makefile
contrib/adaptive-compression/datagencli.c [new file with mode: 0644]
contrib/adaptive-compression/test-correctness.sh [new file with mode: 0755]
contrib/adaptive-compression/test-performance.sh [new file with mode: 0755]

index ed1a55ad453309290c318204e278e3487db65134..f2059a193b1056aa71de4be8d3b8cfabfc42624f 100644 (file)
@@ -19,13 +19,24 @@ CFLAGS   += $(DEBUGFLAGS)
 CFLAGS   += $(MOREFLAGS)
 FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
 
-all: adapt
+all: adapt datagen
 
 adapt: $(ZSTD_FILES) adapt.c
        $(CC) $(FLAGS) $^ -o $@
 
+datagen : $(PRGDIR)/datagen.c datagencli.c
+       $(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+test-adapt-correctness: datagen adapt
+       @./test-correctness.sh
+       @echo "test correctness complete"
+
+test-adapt-performance: datagen adapt
+       @./test-performance.sh
+       @echo "test performance complete"
+
 clean:
-       @$(RM) -f adapt
+       @$(RM) -f adapt datagen
        @$(RM) -rf *.dSYM
        @$(RM) -f tmp*
        @$(RM) -f tests/*.zst
diff --git a/contrib/adaptive-compression/datagencli.c b/contrib/adaptive-compression/datagencli.c
new file mode 100644 (file)
index 0000000..8a81939
--- /dev/null
@@ -0,0 +1,129 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/*-************************************
+*  Dependencies
+**************************************/
+#include "util.h"      /* Compiler options */
+#include <stdio.h>     /* fprintf, stderr */
+#include "datagen.h"   /* RDG_generate */
+
+
+/*-************************************
+*  Constants
+**************************************/
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define SIZE_DEFAULT ((64 KB) + 1)
+#define SEED_DEFAULT 0
+#define COMPRESSIBILITY_DEFAULT 50
+
+
+/*-************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned displayLevel = 2;
+
+
+/*-*******************************************************
+*  Command line
+*********************************************************/
+static int usage(const char* programName)
+{
+    DISPLAY( "Compressible data generator\n");
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -g#    : generate # data (default:%i)\n", SIZE_DEFAULT);
+    DISPLAY( " -s#    : Select seed (default:%i)\n", SEED_DEFAULT);
+    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n",
+                        COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, const char** argv)
+{
+    unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
+    double litProba = 0.0;
+    U64 size = SIZE_DEFAULT;
+    U32 seed = SEED_DEFAULT;
+    const char* const programName = argv[0];
+
+    int argNb;
+    for(argNb=1; argNb<argc; argNb++) {
+        const char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Handle commands. Aggregated commands are allowed */
+        if (*argument=='-') {
+            argument++;
+            while (*argument!=0) {
+                switch(*argument)
+                {
+                case 'h':
+                    return usage(programName);
+                case 'g':
+                    argument++;
+                    size=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                        size *= 10, size += *argument++ - '0';
+                    if (*argument=='K') { size <<= 10; argument++; }
+                    if (*argument=='M') { size <<= 20; argument++; }
+                    if (*argument=='G') { size <<= 30; argument++; }
+                    if (*argument=='B') { argument++; }
+                    break;
+                case 's':
+                    argument++;
+                    seed=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                        seed *= 10, seed += *argument++ - '0';
+                    break;
+                case 'P':
+                    argument++;
+                    probaU32 = 0;
+                    while ((*argument>='0') && (*argument<='9'))
+                        probaU32 *= 10, probaU32 += *argument++ - '0';
+                    if (probaU32>100) probaU32 = 100;
+                    break;
+                case 'L':   /* hidden argument : Literal distribution probability */
+                    argument++;
+                    litProba=0.;
+                    while ((*argument>='0') && (*argument<='9'))
+                        litProba *= 10, litProba += *argument++ - '0';
+                    if (litProba>100.) litProba=100.;
+                    litProba /= 100.;
+                    break;
+                case 'v':
+                    displayLevel = 4;
+                    argument++;
+                    break;
+                default:
+                    return usage(programName);
+                }
+    }   }   }   /* for(argNb=1; argNb<argc; argNb++) */
+
+    DISPLAYLEVEL(4, "Compressible data Generator \n");
+    if (probaU32!=COMPRESSIBILITY_DEFAULT)
+        DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32);
+    DISPLAYLEVEL(3, "Seed = %u \n", seed);
+
+    RDG_genStdout(size, (double)probaU32/100, litProba, seed);
+    DISPLAYLEVEL(1, "\n");
+
+    return 0;
+}
diff --git a/contrib/adaptive-compression/test-correctness.sh b/contrib/adaptive-compression/test-correctness.sh
new file mode 100755 (executable)
index 0000000..6277faf
--- /dev/null
@@ -0,0 +1,205 @@
+echo "correctness tests -- general"
+./datagen -g1GB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g500MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g250MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g125MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g50MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g25MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g5MB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g500KB > tmp
+./adapt -otmp.zst tmp
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+echo -e "\ncorrectness tests -- streaming"
+./datagen -g1GB > tmp
+cat tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100MB > tmp
+cat tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10MB > tmp
+cat tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g1MB > tmp
+cat tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100KB > tmp
+cat tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10KB > tmp
+cat tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+echo -e "\ncorrectness tests -- read limit"
+./datagen -g1GB > tmp
+pv -L 50m -q tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100MB > tmp
+pv -L 50m -q tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10MB > tmp
+pv -L 50m -q tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g1MB > tmp
+pv -L 50m -q tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100KB > tmp
+pv -L 50m -q tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10KB > tmp
+pv -L 50m -q tmp | ./adapt > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+echo -e "\ncorrectness tests -- write limit"
+./datagen -g1GB > tmp
+pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100MB > tmp
+pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10MB > tmp
+pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g1MB > tmp
+pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100KB > tmp
+pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10KB > tmp
+pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+echo -e "\ncorrectness tests -- read and write limits"
+./datagen -g1GB > tmp
+pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100MB > tmp
+pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10MB > tmp
+pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g1MB > tmp
+pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g100KB > tmp
+pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+./datagen -g10KB > tmp
+pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
+zstd -d tmp.zst -o tmp2
+diff -q tmp tmp2
+rm tmp*
+
+
+make clean
diff --git a/contrib/adaptive-compression/test-performance.sh b/contrib/adaptive-compression/test-performance.sh
new file mode 100755 (executable)
index 0000000..6a88325
--- /dev/null
@@ -0,0 +1,34 @@
+echo "testing time"
+./datagen -g1GB > tmp
+time ./adapt -otmp1.zst tmp
+time zstd -1 -o tmp2.zst tmp
+rm tmp*
+
+./datagen -g2GB > tmp
+time ./adapt -otmp1.zst tmp
+time zstd -1 -o tmp2.zst tmp
+rm tmp*
+
+./datagen -g4GB > tmp
+time ./adapt -otmp1.zst tmp
+time zstd -1 -o tmp2.zst tmp
+rm tmp*
+
+echo -e "\ntesting compression ratio"
+./datagen -g1GB > tmp
+time ./adapt -otmp1.zst tmp
+time zstd -1 -o tmp2.zst tmp
+ls -l tmp1.zst tmp2.zst
+rm tmp*
+
+./datagen -g2GB > tmp
+time ./adapt -otmp1.zst tmp
+time zstd -1 -o tmp2.zst tmp
+ls -l tmp1.zst tmp2.zst
+rm tmp*
+
+./datagen -g4GB > tmp
+time ./adapt -otmp1.zst tmp
+time zstd -1 -o tmp2.zst tmp
+ls -l tmp1.zst tmp2.zst
+rm tmp*