]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
largeNbDicts compatible with multiple source files
authorYann Collet <cyan@fb.com>
Thu, 30 Aug 2018 18:02:08 +0000 (11:02 -0700)
committerYann Collet <cyan@fb.com>
Thu, 30 Aug 2018 21:38:49 +0000 (14:38 -0700)
splitting is disabled by default, but can be re-enabled using usual command -B#
update commands to look like zstd ones

contrib/largeNbDicts/Makefile
contrib/largeNbDicts/largeNbDicts.c
programs/bench.c
programs/util.h

index 0f8dbb3a06b6edfa2419cf951e5423f0703c9df3..cf5293991beec6e63847b1622b17d92b8cf4b113 100644 (file)
@@ -16,7 +16,7 @@ CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
 
 CFLAGS  ?= -O3
 DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-            -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+            -Wstrict-aliasing=1 -Wswitch-enum \
             -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
             -Wredundant-decls
index 193362493326dbf351c36f1a150295b3de2b14dc..5b982bd429afeac32c11509a0258af73d2083d81 100644 (file)
 #define KB  *(1<<10)
 #define MB  *(1<<20)
 
-#define BLOCKSIZE_DEFAULT (4 KB)
+#define BLOCKSIZE_DEFAULT 0  /* no slicing into blocks */
 #define DICTSIZE  (4 KB)
 #define CLEVEL_DEFAULT 3
 
+#define BENCH_TIME_DEFAULT_S   6
+#define RUN_TIME_DEFAULT_MS    1000
+#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
+
 #define DISPLAY_LEVEL_DEFAULT 3
 
+#define BENCH_SIZE_MAX (1200 MB)
+
+
+/*---  Macros  ---*/
+#define CONTROL(c)   assert(c)
+
 
 /*---  Display Macros  ---*/
 
@@ -59,12 +69,17 @@ typedef struct {
 
 static const buffer_t kBuffNull = { NULL, 0, 0 };
 
-
-static buffer_t fillBuffer_fromHandle(buffer_t buff, FILE* f)
+/* @return : kBuffNull if any error */
+static buffer_t createBuffer(size_t capacity)
 {
-    size_t const readSize = fread(buff.ptr, 1, buff.capacity, f);
-    buff.size = readSize;
-    return buff;
+    void* const ptr = malloc(capacity);
+    if (ptr==NULL) return kBuffNull;
+
+    buffer_t buffer;
+    buffer.ptr = ptr;
+    buffer.capacity = capacity;
+    buffer.size = 0;
+    return buffer;
 }
 
 static void freeBuffer(buffer_t buff)
@@ -72,20 +87,24 @@ static void freeBuffer(buffer_t buff)
     free(buff.ptr);
 }
 
+
+static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
+{
+    size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
+    buff->size = readSize;
+}
+
 /* @return : kBuffNull if any error */
 static buffer_t createBuffer_fromHandle(FILE* f, size_t bufferSize)
 {
-    void* const buffer = malloc(bufferSize);
-    if (buffer==NULL) return kBuffNull;
-
-    {   buffer_t buff = { buffer, 0, bufferSize };
-        buff = fillBuffer_fromHandle(buff, f);
-        if (buff.size != buff.capacity) {
-            freeBuffer(buff);
-            return kBuffNull;
-        }
-        return buff;
+    buffer_t buff = createBuffer(bufferSize);
+    if (buff.ptr == NULL) return kBuffNull;
+    fillBuffer_fromHandle(&buff, f);
+    if (buff.size != buff.capacity) {
+        freeBuffer(buff);
+        return kBuffNull;
     }
+    return buff;
 }
 
 /* @return : kBuffNull if any error */
@@ -107,98 +126,257 @@ static buffer_t createBuffer_fromFile(const char* fileName)
     }
 }
 
+static buffer_t
+createDictionaryBuffer(const char* dictionaryName,
+                       const void* srcBuffer,
+                       const size_t* srcBlockSizes, unsigned nbBlocks)
+{
+    if (dictionaryName) {
+        DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
+        return createBuffer_fromFile(dictionaryName);
+    } else {
+        DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", DICTSIZE);
+        void* const dictBuffer = malloc(DICTSIZE);
+        assert(dictBuffer != NULL);
+
+        size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, DICTSIZE,
+                                                    srcBuffer,
+                                                    srcBlockSizes,
+                                                    nbBlocks);
+        assert(!ZSTD_isError(dictSize));
+
+        buffer_t result;
+        result.ptr = dictBuffer;
+        result.capacity = DICTSIZE;
+        result.size = dictSize;
+        return result;
+    }
+}
+
+
+/*! BMK_loadFiles() :
+ *  Loads `buffer`, with content from files listed within `fileNamesTable`.
+ *  Fills `buffer` entirely.
+ * @return : 0 on success, !=0 on error */
+static int loadFiles(void* buffer, size_t bufferSize,
+                     size_t* fileSizes,
+                     const char* const * fileNamesTable, unsigned nbFiles)
+{
+    size_t pos = 0, totalSize = 0;
+
+    for (unsigned n=0; n<nbFiles; n++) {
+        U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
+        if (UTIL_isDirectory(fileNamesTable[n])) {
+            fileSizes[n] = 0; fileSize = 0;
+            continue;
+        }
+        if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+            fileSizes[n] = 0; fileSize = 0;
+            continue;
+        }
+
+        FILE* const f = fopen(fileNamesTable[n], "rb");
+        assert(f!=NULL);
+
+        assert(pos <= bufferSize);
+        assert(fileSize <= bufferSize - pos);
+
+        {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+            assert(readSize == fileSize);
+            pos += readSize;
+        }
+        fileSizes[n] = (size_t)fileSize;
+        totalSize += (size_t)fileSize;
+        fclose(f);
+    }
+
+    assert(totalSize == bufferSize);
+    return 0;
+}
+
 
-/*---  buffer_collection_t  ---*/
+
+/*---  slice_collection_t  ---*/
 
 typedef struct {
-    void** buffers;
+    void** slicePtrs;
     size_t* capacities;
-    size_t nbBuffers;
-} buffer_collection_t;
+    size_t nbSlices;
+} slice_collection_t;
 
-static const buffer_collection_t kNullCollection = { NULL, NULL, 0 };
+static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
 
-static void freeCollection(buffer_collection_t collection)
+static void freeSliceCollection(slice_collection_t collection)
 {
-    free(collection.buffers);
+    free(collection.slicePtrs);
     free(collection.capacities);
 }
 
-/* returns .buffers=NULL if operation fails */
-buffer_collection_t splitBuffer(buffer_t srcBuffer, size_t blockSize)
+/* shrinkSizes() :
+ * downsizes sizes of slices within collection, according to `newSizes`.
+ * every `newSizes` entry must be <= than its corresponding collection size */
+void shrinkSizes(slice_collection_t collection,
+                 const size_t* newSizes)  /* presumed same size as collection */
 {
-    size_t const nbBlocks = (srcBuffer.size + (blockSize-1)) / blockSize;
+    size_t const nbSlices = collection.nbSlices;
+    for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
+        assert(newSizes[blockNb] <= collection.capacities[blockNb]);
+        collection.capacities[blockNb] = newSizes[blockNb];
+    }
+}
 
-    void** const buffers = malloc(nbBlocks * sizeof(void*));
-    size_t* const capacities = malloc(nbBlocks * sizeof(size_t*));
-    if ((buffers==NULL) || capacities==NULL) {
-        free(buffers);
+
+slice_collection_t splitSlices(slice_collection_t srcSlices, size_t blockSize)
+{
+    if (blockSize==0) blockSize = (size_t)(-1);   /* means "do not cut" */
+    size_t nbBlocks = 0;
+    for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
+        size_t pos = 0;
+        while (pos <= srcSlices.capacities[ssnb]) {
+            nbBlocks++;
+            pos += blockSize;
+        }
+    }
+
+    void** const sliceTable = (void**)malloc(nbBlocks * sizeof(*sliceTable));
+    size_t* const capacities = (size_t*)malloc(nbBlocks * sizeof(*capacities));
+    if (sliceTable == NULL || capacities == NULL) {
+        free(sliceTable);
         free(capacities);
         return kNullCollection;
     }
 
-    char* newBlockPtr = (char*)srcBuffer.ptr;
-    char* const srcEnd = newBlockPtr + srcBuffer.size;
-    assert(nbBlocks >= 1);
-    for (size_t blockNb = 0; blockNb < nbBlocks-1; blockNb++) {
-        buffers[blockNb] = newBlockPtr;
-        capacities[blockNb] = blockSize;
-        newBlockPtr += blockSize;
+    size_t blockNb = 0;
+    for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
+        size_t pos = 0;
+        char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
+        while (pos < srcSlices.capacities[ssnb]) {
+            size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
+            sliceTable[blockNb] = ptr + pos;
+            capacities[blockNb] = size;
+            blockNb++;
+            pos += blockSize;
+        }
     }
+    assert(blockNb == nbBlocks);
 
-    /* last block */
-    assert(newBlockPtr <= srcEnd);
-    size_t const lastBlockSize = (srcEnd - newBlockPtr);
-    buffers[nbBlocks-1] = newBlockPtr;
-    capacities[nbBlocks-1] = lastBlockSize;
-
-    buffer_collection_t result;
-    result.buffers = buffers;
+    slice_collection_t result;
+    result.nbSlices = nbBlocks;
+    result.slicePtrs = sliceTable;
     result.capacities = capacities;
-    result.nbBuffers = nbBlocks;
     return result;
 }
 
-/* shrinkSizes() :
- * update sizes in buffer collection */
-void shrinkSizes(buffer_collection_t collection,
-                 const size_t* sizes)  /* presumed same size as collection */
+
+static size_t sliceCollection_totalCapacity(slice_collection_t sc)
 {
-    size_t const nbBlocks = collection.nbBuffers;
-    for (size_t blockNb = 0; blockNb < nbBlocks; blockNb++) {
-        assert(sizes[blockNb] <= collection.capacities[blockNb]);
-        collection.capacities[blockNb] = sizes[blockNb];
-    }
+    size_t totalSize = 0;
+    for (size_t n=0; n<sc.nbSlices; n++)
+        totalSize += sc.capacities[n];
+    return totalSize;
 }
 
-/*---  dictionary creation  ---*/
 
-buffer_t createDictionary(const char* dictionary,
-                        const void* srcBuffer, size_t* srcBlockSizes, unsigned nbBlocks)
+/* ---  buffer collection  --- */
+
+typedef struct {
+    buffer_t buffer;
+    slice_collection_t slices;
+} buffer_collection_t;
+
+
+static void freeBufferCollection(buffer_collection_t bc)
 {
-    if (dictionary) {
-        DISPLAYLEVEL(3, "loading dictionary %s \n", dictionary);
-        return createBuffer_fromFile(dictionary);
-    } else {
-        DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", DICTSIZE);
-        void* const dictBuffer = malloc(DICTSIZE);
-        assert(dictBuffer != NULL);
+    freeBuffer(bc.buffer);
+    freeSliceCollection(bc.slices);
+}
 
-        size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, DICTSIZE,
-                                                    srcBuffer,
-                                                    srcBlockSizes,
-                                                    nbBlocks);
-        assert(!ZSTD_isError(dictSize));
 
-        buffer_t result;
-        result.ptr = dictBuffer;
-        result.capacity = DICTSIZE;
-        result.size = dictSize;
-        return result;
+static buffer_collection_t
+createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
+{
+    size_t const bufferSize = sliceCollection_totalCapacity(sc);
+
+    buffer_t buffer = createBuffer(bufferSize);
+    CONTROL(buffer.ptr != NULL);
+
+    size_t const nbSlices = sc.nbSlices;
+    void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
+    CONTROL(slices != NULL);
+
+    size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
+    CONTROL(capacities != NULL);
+
+    char* const ptr = (char*)buffer.ptr;
+    size_t pos = 0;
+    for (size_t n=0; n < nbSlices; n++) {
+        capacities[n] = sc.capacities[n];
+        slices[n] = ptr + pos;
+        pos += capacities[n];
+    }
+
+    buffer_collection_t result;
+    result.buffer = buffer;
+    result.slices.nbSlices = nbSlices;
+    result.slices.capacities = capacities;
+    result.slices.slicePtrs = slices;
+    return result;
+}
+
+
+/* @return : kBuffNull if any error */
+static buffer_collection_t
+createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
+{
+    U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
+    assert(totalSizeToLoad <= BENCH_SIZE_MAX);
+    size_t const loadedSize = (size_t)totalSizeToLoad;
+    void* const srcBuffer = malloc(loadedSize);
+    assert(srcBuffer != NULL);
+
+    assert(nbFiles > 0);
+    size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
+    assert(fileSizes != NULL);
+
+    /* Load input buffer */
+    int const errorCode = loadFiles(srcBuffer, loadedSize,
+                                    fileSizes,
+                                    fileNamesTable, nbFiles);
+    assert(errorCode == 0);
+
+    void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
+    assert(sliceTable != NULL);
+
+    char* const ptr = (char*)srcBuffer;
+    size_t pos = 0;
+    unsigned fileNb = 0;
+    for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
+        sliceTable[fileNb] = ptr + pos;
+        pos += fileSizes[fileNb];
     }
+    assert(pos == loadedSize);
+    assert(fileNb == nbFiles);
+
+
+    buffer_t buffer;
+    buffer.ptr = srcBuffer;
+    buffer.capacity = loadedSize;
+    buffer.size = loadedSize;
+
+    slice_collection_t slices;
+    slices.slicePtrs = sliceTable;
+    slices.capacities = fileSizes;
+    slices.nbSlices = nbFiles;
+
+    buffer_collection_t bc;
+    bc.buffer = buffer;
+    bc.slices = slices;
+    return bc;
 }
 
 
+
+
 /*---  ddict_collection_t  ---*/
 
 typedef struct {
@@ -260,12 +438,12 @@ void shuffleDictionaries(ddict_collection_t dicts)
  *        or 0 if error.
  */
 static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If present, must contain at least nbBlocks fields */
-                             buffer_collection_t dstBlockBuffers,
-                             buffer_collection_t srcBlockBuffers,
+                             slice_collection_t dstBlockBuffers,
+                             slice_collection_t srcBlockBuffers,
                              ZSTD_CDict* cdict, int cLevel)
 {
-    size_t const nbBlocks = srcBlockBuffers.nbBuffers;
-    assert(dstBlockBuffers.nbBuffers == srcBlockBuffers.nbBuffers);
+    size_t const nbBlocks = srcBlockBuffers.nbSlices;
+    assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
 
     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
     assert(cctx != NULL);
@@ -275,16 +453,16 @@ static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If pre
         size_t cBlockSize;
         if (cdict == NULL) {
             cBlockSize = ZSTD_compressCCtx(cctx,
-                            dstBlockBuffers.buffers[blockNb], dstBlockBuffers.capacities[blockNb],
-                            srcBlockBuffers.buffers[blockNb], srcBlockBuffers.capacities[blockNb],
+                            dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
+                            srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
                             cLevel);
         } else {
             cBlockSize = ZSTD_compress_usingCDict(cctx,
-                            dstBlockBuffers.buffers[blockNb], dstBlockBuffers.capacities[blockNb],
-                            srcBlockBuffers.buffers[blockNb], srcBlockBuffers.capacities[blockNb],
+                            dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
+                            srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
                             cdict);
         }
-        assert(!ZSTD_isError(cBlockSize));
+        CONTROL(!ZSTD_isError(cBlockSize));
         if (cSizes) cSizes[blockNb] = cBlockSize;
         totalCSize += cBlockSize;
     }
@@ -337,31 +515,32 @@ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity
 }
 
 
-#define BENCH_TIME_DEFAULT_MS 6000
-#define RUN_TIME_DEFAULT_MS   1000
-
-static int benchMem(buffer_collection_t dstBlocks,
-                    buffer_collection_t srcBlocks,
-                    ddict_collection_t dictionaries)
+static int benchMem(slice_collection_t dstBlocks,
+                    slice_collection_t srcBlocks,
+                    ddict_collection_t dictionaries,
+                    int nbRounds)
 {
-    assert(dstBlocks.nbBuffers == srcBlocks.nbBuffers);
+    assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
+
+    unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
+    unsigned const total_time_ms = nbRounds * ms_per_round;
 
     double bestSpeed = 0.;
 
     BMK_timedFnState_t* const benchState =
-            BMK_createTimedFnState(BENCH_TIME_DEFAULT_MS, RUN_TIME_DEFAULT_MS);
+            BMK_createTimedFnState(total_time_ms, ms_per_round);
     decompressInstructions di = createDecompressInstructions(dictionaries);
 
     for (;;) {
         BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState,
                                 decompress, &di,
                                 NULL, NULL,
-                                dstBlocks.nbBuffers,
-                                (const void* const *)srcBlocks.buffers, srcBlocks.capacities,
-                                dstBlocks.buffers, dstBlocks.capacities,
+                                dstBlocks.nbSlices,
+                                (const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities,
+                                dstBlocks.slicePtrs, dstBlocks.capacities,
                                 NULL);
+        CONTROL(BMK_isSuccessful_runOutcome(outcome));
 
-        assert(BMK_isSuccessful_runOutcome(outcome));
         BMK_runTime_t const result = BMK_extract_runTime(outcome);
         U64 const dTime_ns = result.nanoSecPerRun;
         double const dTime_sec = (double)dTime_ns / 1000000000;
@@ -381,65 +560,87 @@ static int benchMem(buffer_collection_t dstBlocks,
 }
 
 
-/* bench() :
- * fileName : file to load for benchmarking purpose
- * dictionary : optional (can be NULL), file to load as dictionary,
+/*! bench() :
+ *  fileName : file to load for benchmarking purpose
+ *  dictionary : optional (can be NULL), file to load as dictionary,
  *              if none provided : will be calculated on the fly by the program.
  * @return : 0 is success, 1+ otherwise */
-int bench(const char* fileName, const char* dictionary,
-          size_t blockSize, int clevel, unsigned nbDictMax)
+int bench(const char** fileNameTable, unsigned nbFiles,
+          const char* dictionary,
+          size_t blockSize, int clevel, unsigned nbDictMax, int nbRounds)
 {
     int result = 0;
 
-    DISPLAYLEVEL(3, "loading %s... \n", fileName);
-    buffer_t const srcBuffer = createBuffer_fromFile(fileName);
-    assert(srcBuffer.ptr != NULL);
+    DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
+    buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
+    CONTROL(srcs.buffer.ptr != NULL);
+    buffer_t srcBuffer = srcs.buffer;
     size_t const srcSize = srcBuffer.size;
     DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
                     (double)srcSize / (1 MB));
 
-    buffer_collection_t const srcBlockBuffers = splitBuffer(srcBuffer, blockSize);
-    assert(srcBlockBuffers.buffers != NULL);
-    unsigned const nbBlocks = (unsigned)srcBlockBuffers.nbBuffers;
-    DISPLAYLEVEL(3, "split input into %u blocks of max size %u bytes \n",
-                    nbBlocks, (unsigned)blockSize);
+    slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize);
+    unsigned const nbBlocks = (unsigned)(srcSlices.nbSlices);
+    DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
+    if (blockSize)
+        DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
+    DISPLAYLEVEL(3, "\n");
+
+
+    size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
+    CONTROL(dstCapacities != NULL);
+    size_t dstBufferCapacity = 0;
+    for (size_t bnb=0; bnb<nbBlocks; bnb++) {
+        dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
+        dstBufferCapacity += dstCapacities[bnb];
+    }
 
-    size_t const dstBlockSize = ZSTD_compressBound(blockSize);
-    size_t const dstBufferCapacity = nbBlocks * dstBlockSize;
-    void* const dstPtr = malloc(dstBufferCapacity);
-    assert(dstPtr != NULL);
-    buffer_t dstBuffer;
-    dstBuffer.ptr = dstPtr;
-    dstBuffer.capacity = dstBufferCapacity;
-    dstBuffer.size = dstBufferCapacity;
+    buffer_t dstBuffer = createBuffer(dstBufferCapacity);
+    CONTROL(dstBuffer.ptr != NULL);
+
+    void** const sliceTable = (void**)malloc(nbBlocks * sizeof(*sliceTable));
+    CONTROL(sliceTable != NULL);
+
+    {   char* const ptr = (char*)dstBuffer.ptr;
+        size_t pos = 0;
+        for (size_t snb=0; snb < nbBlocks; snb++) {
+            sliceTable[snb] = ptr + pos;
+            pos += dstCapacities[snb];
+    }   }
+
+    slice_collection_t dstSlices;
+    dstSlices.capacities = dstCapacities;
+    dstSlices.slicePtrs = sliceTable;
+    dstSlices.nbSlices = nbBlocks;
 
-    buffer_collection_t const dstBlockBuffers = splitBuffer(dstBuffer, dstBlockSize);
-    assert(dstBlockBuffers.buffers != NULL);
 
     /* dictionary determination */
-    buffer_t const dictBuffer = createDictionary(dictionary,
+    buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
                                 srcBuffer.ptr,
-                                srcBlockBuffers.capacities, nbBlocks);
-    assert(dictBuffer.ptr != NULL);
+                                srcSlices.capacities, nbBlocks);
+    CONTROL(dictBuffer.ptr != NULL);
 
     ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
-    assert(cdict != NULL);
+    CONTROL(cdict != NULL);
 
-    size_t const cTotalSizeNoDict = compressBlocks(NULL, dstBlockBuffers, srcBlockBuffers, NULL, clevel);
-    assert(cTotalSizeNoDict != 0);
+    size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
+    CONTROL(cTotalSizeNoDict != 0);
     DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f  (%u bytes) \n",
                     clevel,
                     (double)srcSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
 
     size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
-    assert(cSizes != NULL);
+    CONTROL(cSizes != NULL);
 
-    size_t const cTotalSize = compressBlocks(cSizes, dstBlockBuffers, srcBlockBuffers, cdict, clevel);
-    assert(cTotalSize != 0);
+    size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
+    CONTROL(cTotalSize != 0);
     DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f  (%u bytes) \n",
                     (unsigned)dictBuffer.size,
                     (double)srcSize / cTotalSize, (unsigned)cTotalSize);
 
+    /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
+    shrinkSizes(dstSlices, cSizes);
+
     size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
     unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
     size_t const allDictMem = dictMem * nbDicts;
@@ -447,43 +648,31 @@ int bench(const char* fileName, const char* dictionary,
                     nbDicts, (double)allDictMem / (1 MB));
 
     ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
-    assert(dictionaries.ddicts != NULL);
+    CONTROL(dictionaries.ddicts != NULL);
 
     shuffleDictionaries(dictionaries);
-    // for (size_t u = 0; u < dictionaries.nbDDict; u++) DISPLAY("dict address : %p \n", dictionaries.ddicts[u]);   /* check dictionary addresses */
 
-    void* const resultPtr = malloc(srcSize);
-    assert(resultPtr != NULL);
-    buffer_t resultBuffer;
-    resultBuffer.ptr = resultPtr;
-    resultBuffer.capacity = srcSize;
-    resultBuffer.size = srcSize;
+    buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
+    CONTROL(resultCollection.buffer.ptr != NULL);
 
-    buffer_collection_t const resultBlockBuffers = splitBuffer(resultBuffer, blockSize);
-    assert(resultBlockBuffers.buffers != NULL);
-
-    shrinkSizes(dstBlockBuffers, cSizes);
-
-    result = benchMem(resultBlockBuffers, dstBlockBuffers, dictionaries);
+    result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
 
     /* free all heap objects in reverse order */
-    freeCollection(resultBlockBuffers);
-    free(resultPtr);
+    freeBufferCollection(resultCollection);
     freeDDictCollection(dictionaries);
     free(cSizes);
     ZSTD_freeCDict(cdict);
     freeBuffer(dictBuffer);
-    freeCollection(dstBlockBuffers);
+    freeSliceCollection(dstSlices);
     freeBuffer(dstBuffer);
-    freeCollection(srcBlockBuffers);
-    freeBuffer(srcBuffer);
+    freeSliceCollection(srcSlices);
+    freeBufferCollection(srcs);
 
     return result;
 }
 
 
 
-
 /* ---  Command Line  --- */
 
 /*! readU32FromChar() :
@@ -533,33 +722,57 @@ int bad_usage(const char* exeName)
     DISPLAY (" bad usage : \n");
     DISPLAY (" %s filename [Options] \n", exeName);
     DISPLAY ("Options : \n");
-    DISPLAY ("--clevel=#     : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
-    DISPLAY ("--blockSize=#  : cut input into blocks of size # (default: %u) \n", BLOCKSIZE_DEFAULT);
-    DISPLAY ("--dictionary=# : use # as a dictionary (default: create one) \n");
-    DISPLAY ("--nbDicts=#    : set nb of dictionaries to # (default: one per block) \n");
+    DISPLAY ("-r             : recursively load all files in subdirectories (default: off) \n");
+    DISPLAY ("-B#            : split input into blocks of size # (default: no split) \n");
+    DISPLAY ("-#             : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
+    DISPLAY ("-D #           : use # as a dictionary (default: create one) \n");
+    DISPLAY ("-i#            : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
+    DISPLAY ("--nbDicts=#    : create # dictionaries for bench (default: one per block) \n");
     return 1;
 }
 
 int main (int argc, const char** argv)
 {
+    int recursiveMode = 0;
+    int nbRounds = BENCH_TIME_DEFAULT_S;
     const char* const exeName = argv[0];
 
     if (argc < 2) return bad_usage(exeName);
-    const char* const fileName = argv[1];
+
+    const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
+    assert(nameTable != NULL);
+    unsigned nameIdx = 0;
 
     const char* dictionary = NULL;
     int cLevel = CLEVEL_DEFAULT;
     size_t blockSize = BLOCKSIZE_DEFAULT;
-    size_t nbDicts = 0;  /* auto, 1 dict per block */
+    size_t nbDicts = 0;  /* determine nbDicts automatically: 1 dictionary per block */
 
-    for (int argNb = 2; argNb < argc ; argNb++) {
+    for (int argNb = 1; argNb < argc ; argNb++) {
         const char* argument = argv[argNb];
-        if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
-        if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
+        if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
+        if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
+        if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
         if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
+        if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
         if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
-        return bad_usage(exeName);
+        if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
+        /* anything that's not a command is a filename */
+        nameTable[nameIdx++] = argument;
+    }
+
+    const char** filenameTable = nameTable;
+    unsigned nbFiles = nameIdx;
+    char* buffer_containing_filenames = NULL;
+
+    if (recursiveMode) {
+#ifndef UTIL_HAS_CREATEFILELIST
+        assert(0);   /* missing capability, do not run */
+#endif
+        filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
     }
 
-    return bench(fileName, dictionary, blockSize, cLevel, nbDicts);
+    return bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbRounds);
 }
index 5ff9afac588abbdebff84d36f626d3036b304786..326c1c1c56e5973a59171e731469517d1b9b7872 100644 (file)
@@ -951,8 +951,9 @@ static size_t BMK_findMaxMem(U64 requiredMem)
  *  Loads `buffer` with content of files listed within `fileNamesTable`.
  *  At most, fills `buffer` entirely. */
 static int BMK_loadFiles(void* buffer, size_t bufferSize,
-                          size_t* fileSizes, const char* const * const fileNamesTable,
-                          unsigned nbFiles, int displayLevel)
+                         size_t* fileSizes,
+                         const char* const * fileNamesTable, unsigned nbFiles,
+                         int displayLevel)
 {
     size_t pos = 0, totalSize = 0;
     unsigned n;
@@ -973,9 +974,10 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize,
         if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]);
         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
         if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
-        { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
-          if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
-          pos += readSize; }
+        {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+            if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
+            pos += readSize;
+        }
         fileSizes[n] = (size_t)fileSize;
         totalSize += (size_t)fileSize;
         fclose(f);
index 4392a5bd0ecbb9fa1870c8f03cc5f9e4e4bc804f..76000d9915ffde774961fa207ba04ad2b9f56e2c 100644 (file)
@@ -526,7 +526,10 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
  * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
  * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
  */
-UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks)
+UTIL_STATIC const char**
+UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
+                    char** allocatedBuffer, unsigned* allocatedNamesNb,
+                    int followLinks)
 {
     size_t pos;
     unsigned i, nbFiles;