first sketch for largeNbDicts test program

author Yann Collet <cyan@fb.com>

Mon, 27 Aug 2018 02:29:12 +0000 (19:29 -0700)

committer Yann Collet <cyan@fb.com>

Mon, 27 Aug 2018 02:29:12 +0000 (19:29 -0700)
author Yann Collet <cyan@fb.com>
Mon, 27 Aug 2018 02:29:12 +0000 (19:29 -0700)
committer Yann Collet <cyan@fb.com>
Mon, 27 Aug 2018 02:29:12 +0000 (19:29 -0700)
diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile

new file mode 100644 (file)

index 0000000..082f010
--- /dev/null
+++ b/contrib/largeNbDicts/Makefile
@@ -0,0 +1,30 @@
+# ################################################################
+# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+
+CPPFLAGS+= -I../../lib -I../../lib/common -I../../lib/dictBuilder -I../../programs
+
+CFLAGS  ?= -O3
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+            -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+            -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
+            -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+            -Wredundant-decls
+CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
+
+
+default: largeNbDicts
+
+largeNbDicts: LDFLAGS += -lzstd
+largeNbDicts: largeNbDicts.c
+       $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+
+clean:
+       $(RM) largeNbDicts
diff --git a/contrib/largeNbDicts/largeNbDicts b/contrib/largeNbDicts/largeNbDicts

new file mode 100755 (executable)

index 0000000..40416f0

Binary files /dev/null and b/contrib/largeNbDicts/largeNbDicts differ
diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c

new file mode 100644 (file)

index 0000000..749d966
--- /dev/null
+++ b/contrib/largeNbDicts/largeNbDicts.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* largeNbDicts
+ * This is a benchmark test tool
+ * dedicated to the specific case of dictionary decompression
+ * using a very large nb of dictionaries
+ * thus generating many cache-misses.
+ * It's created in a bid to investigate performance and find optimizations. */
+
+
+/*---  Dependencies  ---*/
+
+#include <stddef.h>   /* size_t */
+#include <stdlib.h>   /* malloc, free */
+#include <stdio.h>    /* printf */
+#include <assert.h>   /* assert */
+
+#include "util.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+#include "zdict.h"
+
+
+/*---  Constants  --- */
+
+#define KB  *(1<<10)
+#define MB  *(1<<20)
+
+#define BLOCKSIZE (4 KB)
+#define DICTSIZE  (4 KB)
+#define COMP_LEVEL 3
+
+#define DISPLAY_LEVEL_DEFAULT 3
+
+
+/*---  Display Macros  ---*/
+
+#define DISPLAY(...)         fprintf(stdout, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
+static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
+
+
+/*---  buffer_t  ---*/
+
+typedef struct {
+    void* ptr;
+    size_t size;
+    size_t capacity;
+} buffer_t;
+
+static const buffer_t kBuffNull = { NULL, 0, 0 };
+
+
+static buffer_t fillBuffer_fromHandle(buffer_t buff, FILE* f)
+{
+    size_t const readSize = fread(buff.ptr, 1, buff.capacity, f);
+    buff.size = readSize;
+    return buff;
+}
+
+static void freeBuffer(buffer_t buff)
+{
+    free(buff.ptr);
+}
+
+/* @return : kBuffNull if any error */
+static buffer_t createBuffer_fromHandle(FILE* f, size_t bufferSize)
+{
+    void* const buffer = malloc(bufferSize);
+    if (buffer==NULL) return kBuffNull;
+
+    {   buffer_t buff = { buffer, 0, bufferSize };
+        buff = fillBuffer_fromHandle(buff, f);
+        if (buff.size != buff.capacity) {
+            freeBuffer(buff);
+            return kBuffNull;
+        }
+        return buff;
+    }
+}
+
+/* @return : kBuffNull if any error */
+static buffer_t createBuffer_fromFile(const char* fileName)
+{
+    U64 const fileSize = UTIL_getFileSize(fileName);
+    size_t const bufferSize = (size_t) fileSize;
+
+    if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
+    assert((U64)bufferSize == fileSize);   /* check overflow */
+
+    {   buffer_t buff;
+        FILE* const f = fopen(fileName, "rb");
+        if (f == NULL) return kBuffNull;
+
+        buff = createBuffer_fromHandle(f, bufferSize);
+        fclose(f);   /* do nothing specific if fclose() fails */
+        return buff;
+    }
+}
+
+
+/*---  buffer_collection_t  ---*/
+
+typedef struct {
+    void** buffers;
+    size_t* capacities;
+    size_t nbBuffers;
+} buffer_collection_t;
+
+static const buffer_collection_t kNullCollection = { NULL, NULL, 0 };
+
+static void freeCollection(buffer_collection_t collection)
+{
+    free(collection.buffers);
+    free(collection.capacities);
+}
+
+/* returns .buffers=NULL if operation fails */
+buffer_collection_t splitBuffer(buffer_t srcBuffer, size_t blockSize)
+{
+    size_t const nbBlocks = (srcBuffer.size + (blockSize-1)) / blockSize;
+
+    void** const buffers = malloc(nbBlocks * sizeof(void*));
+    size_t* const capacities = malloc(nbBlocks * sizeof(size_t*));
+    if ((buffers==NULL) || capacities==NULL) {
+        free(buffers);
+        free(capacities);
+        return kNullCollection;
+    }
+
+    char* newBlockPtr = (char*)srcBuffer.ptr;
+    char* const srcEnd = newBlockPtr + srcBuffer.size;
+    assert(nbBlocks >= 1);
+    for (size_t blockNb = 0; blockNb < nbBlocks-1; blockNb++) {
+        buffers[blockNb] = newBlockPtr;
+        capacities[blockNb] = blockSize;
+        newBlockPtr += blockSize;
+    }
+
+    /* last block */
+    assert(newBlockPtr <= srcEnd);
+    size_t const lastBlockSize = (srcEnd - newBlockPtr);
+    buffers[nbBlocks-1] = newBlockPtr;
+    capacities[nbBlocks-1] = lastBlockSize;
+
+    buffer_collection_t result;
+    result.buffers = buffers;
+    result.capacities = capacities;
+    result.nbBuffers = nbBlocks;
+    return result;
+}
+
+
+
+/*---  ddict_collection_t  ---*/
+
+typedef struct {
+    ZSTD_DDict** ddicts;
+    size_t nbDDict;
+} ddict_collection_t;
+
+static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
+
+static void freeDDictCollection(ddict_collection_t ddictc)
+{
+    for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
+        ZSTD_freeDDict(ddictc.ddicts[dictNb]);
+    }
+    free(ddictc.ddicts);
+}
+
+/* returns .buffers=NULL if operation fails */
+static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
+{
+    ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
+    if (ddicts==NULL) return kNullDDictCollection;
+    for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
+        ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
+        assert(ddicts[dictNb] != NULL);
+    }
+    ddict_collection_t ddictc;
+    ddictc.ddicts = ddicts;
+    ddictc.nbDDict = nbDDict;
+    return ddictc;
+}
+
+
+
+/*---  Benchmark  --- */
+
+
+/* bench() :
+ * @return : 0 is success, 1+ otherwise */
+int bench(const char* fileName)
+{
+    int result = 0;
+
+    DISPLAYLEVEL(3, "loading %s... \n", fileName);
+    buffer_t const srcBuffer = createBuffer_fromFile(fileName);
+    if (srcBuffer.ptr == NULL) {
+        DISPLAYLEVEL(1," error reading file %s \n", fileName);
+        return 1;
+    }
+    DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
+                    (double)(srcBuffer.size) / (1 MB));
+
+    buffer_collection_t const srcBlockBuffers = splitBuffer(srcBuffer, BLOCKSIZE);
+    assert(srcBlockBuffers.buffers != NULL);
+    unsigned const nbBlocks = (unsigned)srcBlockBuffers.nbBuffers;
+    DISPLAYLEVEL(3, "splitting input into %u blocks of max size %u bytes \n",
+                    nbBlocks, BLOCKSIZE);
+
+    size_t const dstBlockSize = ZSTD_compressBound(BLOCKSIZE);
+    size_t const dstBufferCapacity = nbBlocks * dstBlockSize;
+    void* const dstPtr = malloc(dstBufferCapacity);
+    assert(dstPtr != NULL);
+    buffer_t dstBuffer;
+    dstBuffer.ptr = dstPtr;
+    dstBuffer.capacity = dstBufferCapacity;
+    dstBuffer.size = dstBufferCapacity;
+
+    buffer_collection_t const dstBlockBuffers = splitBuffer(dstBuffer, dstBlockSize);
+    assert(dstBlockBuffers.buffers != NULL);
+
+    DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", DICTSIZE);
+    void* const dictBuffer = malloc(DICTSIZE);
+    if (dictBuffer == NULL) { result = 1; goto _cleanup; }
+
+    size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, DICTSIZE,
+                                                srcBuffer.ptr,
+                                                srcBlockBuffers.capacities,
+                                                nbBlocks);
+    if (ZSTD_isError(dictSize)) {
+        DISPLAYLEVEL(1, "error creating dictionary \n");
+        result = 1;
+        goto _cleanup;
+    }
+
+    size_t const dictMem = ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy);
+    size_t const allDictMem = dictMem * nbBlocks;
+    DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
+                    nbBlocks, (double)allDictMem / (1 MB));
+
+    ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, COMP_LEVEL);
+    do {
+        ddict_collection_t const dictionaries = createDDictCollection(dictBuffer, dictSize, nbBlocks);
+        assert(dictionaries.ddicts != NULL);
+
+        freeDDictCollection(dictionaries);
+    } while(0);
+    ZSTD_freeCDict(cdict);
+
+_cleanup:
+    free(dictBuffer);
+    freeCollection(dstBlockBuffers);
+    freeBuffer(dstBuffer);
+    freeCollection(srcBlockBuffers);
+    freeBuffer(srcBuffer);
+
+    return result;
+}
+
+
+
+
+/*---  Command Line ---*/
+
+int bad_usage(const char* exeName)
+{
+    DISPLAY (" bad usage : \n");
+    DISPLAY (" %s filename \n", exeName);
+    return 1;
+}
+
+int main (int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+
+    if (argc != 2) return bad_usage(exeName);
+    return bench(argv[1]);
+}
diff --git a/lib/Makefile b/lib/Makefile

index 01689c6d53306d554defddf4a78889f6ee325ed8..cf8e45b0f0523a0a5ea51438ec504083f749ccc0 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,7 +23,7 @@ ifeq ($(OS),Windows_NT)   # MinGW assumed
  CPPFLAGS   += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
  endif
  CFLAGS  ?= -O3
-DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
              -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
              -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
              -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h

index 4094669d12dd05c378cf8b05b20e385ed581bae0..3b3a6527c4d912cd1013da98a5a48992030a3bd7 100644 (file)
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -52,7 +52,8 @@ extern "C" {
   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
   */
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
-                                    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+                                    const void* samplesBuffer,
+                                    const size_t* samplesSizes, unsigned nbSamples);
  
  
  /*======   Helper functions   ======*/
diff --git a/programs/bench.h b/programs/bench.h

index f6a53fc6a3609548da609b5d5f6245104d7ab3d6..184cc3e006f3b7c34e002961160d3177e59a14f5 100644 (file)
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -233,7 +233,7 @@ typedef size_t (*BMK_initFn_t)(void* initPayload);
   * srcSizes - an array of the sizes of above buffers
   * dstBuffers - an array of buffers to be written into by benchFn
   * dstCapacities - an array of the capacities of above buffers
- * blockResults - store the return value of benchFn for each block. Optional. Use NULL if this result is not requested.
+ * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested.
   * nbLoops - defines number of times benchFn is run.
   * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result.
   *          Use BMK_isSuccessful_runOutcome() to check if function was successful.
author	Yann Collet <cyan@fb.com>
	Mon, 27 Aug 2018 02:29:12 +0000 (19:29 -0700)
committer	Yann Collet <cyan@fb.com>
	Mon, 27 Aug 2018 02:29:12 +0000 (19:29 -0700)
contrib/largeNbDicts/Makefile	[new file with mode: 0644]	patch \| blob
contrib/largeNbDicts/largeNbDicts	[new file with mode: 0755]	patch \| blob
contrib/largeNbDicts/largeNbDicts.c	[new file with mode: 0644]	patch \| blob
lib/Makefile		patch \| blob \| blame \| history
lib/dictBuilder/zdict.h		patch \| blob \| blame \| history
programs/bench.h		patch \| blob \| blame \| history